]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
[9a/n] PR85694: Reorder vect_is_simple_use arguments
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2
RB
100
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109}
110
272c6793
RS
111/* Return a variable of type ELEM_TYPE[NELEMS]. */
112
113static tree
114create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115{
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
118}
119
120/* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
124
125static tree
355fe088 126read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
127 tree array, unsigned HOST_WIDE_INT n)
128{
129 tree vect_type, vect, vect_name, array_ref;
355fe088 130 gimple *new_stmt;
272c6793
RS
131
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
138
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
143
144 return vect_name;
145}
146
147/* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
150
151static void
355fe088 152write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
153 tree array, unsigned HOST_WIDE_INT n)
154{
155 tree array_ref;
355fe088 156 gimple *new_stmt;
272c6793
RS
157
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
164}
165
166/* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
169
170static tree
44fc7854 171create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 172{
44fc7854 173 tree mem_ref;
272c6793 174
272c6793
RS
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
644ffefd 177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
178 return mem_ref;
179}
180
3ba4ff41
RS
181/* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
183
184static void
185vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
186{
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190}
191
ebfd146a
IR
192/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193
194/* Function vect_mark_relevant.
195
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197
198static void
355fe088 199vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 200 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
201{
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 205 gimple *pattern_stmt;
ebfd146a 206
73fbfcad 207 if (dump_enabled_p ())
66c16fd9
RB
208 {
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
212 }
ebfd146a 213
83197f37
IR
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
ebfd146a
IR
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
97ecdb46
JJ
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
83197f37 224
97ecdb46
JJ
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
226
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
ebfd146a
IR
236 }
237
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
241
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
244 {
73fbfcad 245 if (dump_enabled_p ())
78c60e3d 246 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 247 "already marked relevant/live.\n");
ebfd146a
IR
248 return;
249 }
250
9771b263 251 worklist->safe_push (stmt);
ebfd146a
IR
252}
253
254
b28ead45
AH
255/* Function is_simple_and_all_uses_invariant
256
257 Return true if STMT is simple and all uses of it are invariant. */
258
259bool
260is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
261{
262 tree op;
b28ead45
AH
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
894dd753 272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
b28ead45
AH
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284}
285
ebfd146a
IR
286/* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298static bool
355fe088 299vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
300 enum vect_relevant *relevant, bool *live_p)
301{
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
8644a673 308 *relevant = vect_unused_in_scope;
ebfd146a
IR
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
b8698a0f
L
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
ebfd146a 321 {
73fbfcad 322 if (dump_enabled_p ())
78c60e3d 323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 324 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 325 *relevant = vect_used_in_scope;
ebfd146a
IR
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
73fbfcad 336 if (dump_enabled_p ())
78c60e3d 337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 338 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 339
3157b0c2
AO
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
ebfd146a
IR
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
3a2edf4c
AH
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
ebfd146a
IR
362 return (*live_p || *relevant);
363}
364
365
b8698a0f 366/* Function exist_non_indexing_operands_for_use_p
ebfd146a 367
ff802fa1 368 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
369 used in STMT for anything other than indexing an array. */
370
371static bool
355fe088 372exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
373{
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 376
ff802fa1 377 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
59a05b0c 382
ebfd146a
IR
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
b8698a0f 390 so USE cannot correspond to any operands that are not used
ebfd146a
IR
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
ebfd146a
IR
395
396 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
bfaa08b7
RS
400 {
401 internal_fn ifn = gimple_call_internal_fn (stmt);
402 int mask_index = internal_fn_mask_index (ifn);
403 if (mask_index >= 0
404 && use == gimple_call_arg (stmt, mask_index))
405 return true;
f307441a
RS
406 int stored_value_index = internal_fn_stored_value_index (ifn);
407 if (stored_value_index >= 0
408 && use == gimple_call_arg (stmt, stored_value_index))
409 return true;
bfaa08b7
RS
410 if (internal_gather_scatter_fn_p (ifn)
411 && use == gimple_call_arg (stmt, 1))
412 return true;
bfaa08b7 413 }
5ce9450f
JJ
414 return false;
415 }
416
59a05b0c
EB
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
ebfd146a 419 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
422
423 if (operand == use)
424 return true;
425
426 return false;
427}
428
429
b8698a0f 430/*
ebfd146a
IR
431 Function process_use.
432
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 436 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
ebfd146a
IR
440
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 448 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 449 of the respective DEF_STMT is left unchanged.
b8698a0f
L
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
454
455 Return true if everything is as expected. Return false otherwise. */
456
457static bool
b28ead45 458process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 459 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 460 bool force)
ebfd146a
IR
461{
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
355fe088 466 gimple *def_stmt;
ebfd146a
IR
467 enum vect_def_type dt;
468
b8698a0f 469 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 470 that are used for address computation are not considered relevant. */
aec7ae7d 471 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
472 return true;
473
894dd753 474 if (!vect_is_simple_use (use, loop_vinfo, &dt, &def_stmt))
b8698a0f 475 {
73fbfcad 476 if (dump_enabled_p ())
78c60e3d 477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 478 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
479 return false;
480 }
481
482 if (!def_stmt || gimple_nop_p (def_stmt))
483 return true;
484
485 def_bb = gimple_bb (def_stmt);
486 if (!flow_bb_inside_loop_p (loop, def_bb))
487 {
73fbfcad 488 if (dump_enabled_p ())
e645e942 489 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
490 return true;
491 }
492
b8698a0f
L
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo = vinfo_for_stmt (def_stmt);
499 bb = gimple_bb (stmt);
500 if (gimple_code (stmt) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
502 && gimple_code (def_stmt) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
504 && bb->loop_father == def_bb->loop_father)
505 {
73fbfcad 506 if (dump_enabled_p ())
78c60e3d 507 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 508 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
509 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
510 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
511 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 512 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 513 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
514 return true;
515 }
516
517 /* case 3a: outer-loop stmt defining an inner-loop stmt:
518 outer-loop-header-bb:
519 d = def_stmt
520 inner-loop:
521 stmt # use (d)
522 outer-loop-tail-bb:
523 ... */
524 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
525 {
73fbfcad 526 if (dump_enabled_p ())
78c60e3d 527 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 528 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 529
ebfd146a
IR
530 switch (relevant)
531 {
8644a673 532 case vect_unused_in_scope:
7c5222ff
IR
533 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
534 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 535 break;
7c5222ff 536
ebfd146a 537 case vect_used_in_outer_by_reduction:
7c5222ff 538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
539 relevant = vect_used_by_reduction;
540 break;
7c5222ff 541
ebfd146a 542 case vect_used_in_outer:
7c5222ff 543 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 544 relevant = vect_used_in_scope;
ebfd146a 545 break;
7c5222ff 546
8644a673 547 case vect_used_in_scope:
ebfd146a
IR
548 break;
549
550 default:
551 gcc_unreachable ();
b8698a0f 552 }
ebfd146a
IR
553 }
554
555 /* case 3b: inner-loop stmt defining an outer-loop stmt:
556 outer-loop-header-bb:
557 ...
558 inner-loop:
559 d = def_stmt
06066f92 560 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
561 stmt # use (d) */
562 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
563 {
73fbfcad 564 if (dump_enabled_p ())
78c60e3d 565 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 566 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 567
ebfd146a
IR
568 switch (relevant)
569 {
8644a673 570 case vect_unused_in_scope:
b8698a0f 571 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 572 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 573 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
574 break;
575
ebfd146a 576 case vect_used_by_reduction:
b28ead45 577 case vect_used_only_live:
ebfd146a
IR
578 relevant = vect_used_in_outer_by_reduction;
579 break;
580
8644a673 581 case vect_used_in_scope:
ebfd146a
IR
582 relevant = vect_used_in_outer;
583 break;
584
585 default:
586 gcc_unreachable ();
587 }
588 }
643a9684
RB
589 /* We are also not interested in uses on loop PHI backedges that are
590 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
591 and cause hybrid SLP for SLP inductions. Unless the PHI is live
592 of course. */
643a9684
RB
593 else if (gimple_code (stmt) == GIMPLE_PHI
594 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 595 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
596 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
597 == use))
598 {
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "induction value on backedge.\n");
602 return true;
603 }
604
ebfd146a 605
b28ead45 606 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
607 return true;
608}
609
610
611/* Function vect_mark_stmts_to_be_vectorized.
612
613 Not all stmts in the loop need to be vectorized. For example:
614
615 for i...
616 for j...
617 1. T0 = i + j
618 2. T1 = a[T0]
619
620 3. j = j + 1
621
622 Stmt 1 and 3 do not need to be vectorized, because loop control and
623 addressing of vectorized data-refs are handled differently.
624
625 This pass detects such stmts. */
626
627bool
628vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
629{
ebfd146a
IR
630 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
631 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
632 unsigned int nbbs = loop->num_nodes;
633 gimple_stmt_iterator si;
355fe088 634 gimple *stmt;
ebfd146a
IR
635 unsigned int i;
636 stmt_vec_info stmt_vinfo;
637 basic_block bb;
355fe088 638 gimple *phi;
ebfd146a 639 bool live_p;
b28ead45 640 enum vect_relevant relevant;
ebfd146a 641
adac3a68 642 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
ebfd146a 643
355fe088 644 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
645
646 /* 1. Init worklist. */
647 for (i = 0; i < nbbs; i++)
648 {
649 bb = bbs[i];
650 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 651 {
ebfd146a 652 phi = gsi_stmt (si);
73fbfcad 653 if (dump_enabled_p ())
ebfd146a 654 {
78c60e3d
SS
655 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
656 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
657 }
658
659 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 660 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
661 }
662 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
663 {
664 stmt = gsi_stmt (si);
73fbfcad 665 if (dump_enabled_p ())
ebfd146a 666 {
78c60e3d
SS
667 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 669 }
ebfd146a
IR
670
671 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 672 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
673 }
674 }
675
676 /* 2. Process_worklist */
9771b263 677 while (worklist.length () > 0)
ebfd146a
IR
678 {
679 use_operand_p use_p;
680 ssa_op_iter iter;
681
9771b263 682 stmt = worklist.pop ();
73fbfcad 683 if (dump_enabled_p ())
ebfd146a 684 {
78c60e3d
SS
685 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
686 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
687 }
688
b8698a0f 689 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
690 (DEF_STMT) as relevant/irrelevant according to the relevance property
691 of STMT. */
ebfd146a
IR
692 stmt_vinfo = vinfo_for_stmt (stmt);
693 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 694
b28ead45
AH
695 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
696 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
697
698 One exception is when STMT has been identified as defining a reduction
b28ead45 699 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 700 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 701 those that are used by a reduction computation, and those that are
ff802fa1 702 (also) used by a regular computation. This allows us later on to
b8698a0f 703 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 704 order of the results that they produce does not have to be kept. */
ebfd146a 705
b28ead45 706 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 707 {
06066f92 708 case vect_reduction_def:
b28ead45
AH
709 gcc_assert (relevant != vect_unused_in_scope);
710 if (relevant != vect_unused_in_scope
711 && relevant != vect_used_in_scope
712 && relevant != vect_used_by_reduction
713 && relevant != vect_used_only_live)
06066f92 714 {
b28ead45
AH
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "unsupported use of reduction.\n");
718 return false;
06066f92 719 }
06066f92 720 break;
b8698a0f 721
06066f92 722 case vect_nested_cycle:
b28ead45
AH
723 if (relevant != vect_unused_in_scope
724 && relevant != vect_used_in_outer_by_reduction
725 && relevant != vect_used_in_outer)
06066f92 726 {
73fbfcad 727 if (dump_enabled_p ())
78c60e3d 728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 729 "unsupported use of nested cycle.\n");
7c5222ff 730
06066f92
IR
731 return false;
732 }
b8698a0f
L
733 break;
734
06066f92 735 case vect_double_reduction_def:
b28ead45
AH
736 if (relevant != vect_unused_in_scope
737 && relevant != vect_used_by_reduction
738 && relevant != vect_used_only_live)
06066f92 739 {
73fbfcad 740 if (dump_enabled_p ())
78c60e3d 741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 742 "unsupported use of double reduction.\n");
7c5222ff 743
7c5222ff 744 return false;
06066f92 745 }
b8698a0f 746 break;
7c5222ff 747
06066f92
IR
748 default:
749 break;
7c5222ff 750 }
b8698a0f 751
aec7ae7d 752 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
753 {
754 /* Pattern statements are not inserted into the code, so
755 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
756 have to scan the RHS or function arguments instead. */
757 if (is_gimple_assign (stmt))
758 {
69d2aade
JJ
759 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
760 tree op = gimple_assign_rhs1 (stmt);
761
762 i = 1;
763 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
764 {
765 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 766 relevant, &worklist, false)
69d2aade 767 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 768 relevant, &worklist, false))
566d377a 769 return false;
69d2aade
JJ
770 i = 2;
771 }
772 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 773 {
69d2aade 774 op = gimple_op (stmt, i);
afbe6325 775 if (TREE_CODE (op) == SSA_NAME
b28ead45 776 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 777 &worklist, false))
07687835 778 return false;
9d5e7640
IR
779 }
780 }
781 else if (is_gimple_call (stmt))
782 {
783 for (i = 0; i < gimple_call_num_args (stmt); i++)
784 {
785 tree arg = gimple_call_arg (stmt, i);
b28ead45 786 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 787 &worklist, false))
07687835 788 return false;
9d5e7640
IR
789 }
790 }
791 }
792 else
793 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
794 {
795 tree op = USE_FROM_PTR (use_p);
b28ead45 796 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 797 &worklist, false))
07687835 798 return false;
9d5e7640 799 }
aec7ae7d 800
3bab6342 801 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 802 {
134c85ca
RS
803 gather_scatter_info gs_info;
804 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
805 gcc_unreachable ();
806 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
807 &worklist, true))
566d377a 808 return false;
aec7ae7d 809 }
ebfd146a
IR
810 } /* while worklist */
811
ebfd146a
IR
812 return true;
813}
814
68435eb2
RB
815/* Compute the prologue cost for invariant or constant operands. */
816
817static unsigned
818vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
819 unsigned opno, enum vect_def_type dt,
820 stmt_vector_for_cost *cost_vec)
821{
822 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
823 tree op = gimple_op (stmt, opno);
824 unsigned prologue_cost = 0;
825
826 /* Without looking at the actual initializer a vector of
827 constants can be implemented as load from the constant pool.
828 When all elements are the same we can use a splat. */
829 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
830 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
831 unsigned num_vects_to_check;
832 unsigned HOST_WIDE_INT const_nunits;
833 unsigned nelt_limit;
834 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
835 && ! multiple_p (const_nunits, group_size))
836 {
837 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
838 nelt_limit = const_nunits;
839 }
840 else
841 {
842 /* If either the vector has variable length or the vectors
843 are composed of repeated whole groups we only need to
844 cost construction once. All vectors will be the same. */
845 num_vects_to_check = 1;
846 nelt_limit = group_size;
847 }
848 tree elt = NULL_TREE;
849 unsigned nelt = 0;
850 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
851 {
852 unsigned si = j % group_size;
853 if (nelt == 0)
854 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
855 /* ??? We're just tracking whether all operands of a single
856 vector initializer are the same, ideally we'd check if
857 we emitted the same one already. */
858 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
859 opno))
860 elt = NULL_TREE;
861 nelt++;
862 if (nelt == nelt_limit)
863 {
864 /* ??? We need to pass down stmt_info for a vector type
865 even if it points to the wrong stmt. */
866 prologue_cost += record_stmt_cost
867 (cost_vec, 1,
868 dt == vect_external_def
869 ? (elt ? scalar_to_vec : vec_construct)
870 : vector_load,
871 stmt_info, 0, vect_prologue);
872 nelt = 0;
873 }
874 }
875
876 return prologue_cost;
877}
ebfd146a 878
b8698a0f 879/* Function vect_model_simple_cost.
ebfd146a 880
b8698a0f 881 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
882 single op. Right now, this does not account for multiple insns that could
883 be generated for the single vector op. We will handle that shortly. */
884
68435eb2 885static void
b8698a0f 886vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 887 enum vect_def_type *dt,
4fc5ebf1 888 int ndts,
68435eb2
RB
889 slp_tree node,
890 stmt_vector_for_cost *cost_vec)
ebfd146a 891{
92345349 892 int inside_cost = 0, prologue_cost = 0;
ebfd146a 893
68435eb2 894 gcc_assert (cost_vec != NULL);
ebfd146a 895
68435eb2
RB
896 /* ??? Somehow we need to fix this at the callers. */
897 if (node)
898 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
899
900 if (node)
901 {
902 /* Scan operands and account for prologue cost of constants/externals.
903 ??? This over-estimates cost for multiple uses and should be
904 re-engineered. */
905 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
906 tree lhs = gimple_get_lhs (stmt);
907 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
908 {
909 tree op = gimple_op (stmt, i);
68435eb2
RB
910 enum vect_def_type dt;
911 if (!op || op == lhs)
912 continue;
894dd753 913 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
68435eb2
RB
914 && (dt == vect_constant_def || dt == vect_external_def))
915 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
916 i, dt, cost_vec);
917 }
918 }
919 else
920 /* Cost the "broadcast" of a scalar operand in to a vector operand.
921 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
922 cost model. */
923 for (int i = 0; i < ndts; i++)
924 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
925 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
926 stmt_info, 0, vect_prologue);
927
928 /* Adjust for two-operator SLP nodes. */
929 if (node && SLP_TREE_TWO_OPERATORS (node))
930 {
931 ncopies *= 2;
932 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
933 stmt_info, 0, vect_body);
934 }
c3e7ee41
BS
935
936 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
937 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
938 stmt_info, 0, vect_body);
c3e7ee41 939
73fbfcad 940 if (dump_enabled_p ())
78c60e3d
SS
941 dump_printf_loc (MSG_NOTE, vect_location,
942 "vect_model_simple_cost: inside_cost = %d, "
e645e942 943 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
944}
945
946
8bd37302
BS
947/* Model cost for type demotion and promotion operations. PWR is normally
948 zero for single-step promotions and demotions. It will be one if
949 two-step promotion/demotion is required, and so on. Each additional
950 step doubles the number of instructions required. */
951
952static void
953vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
954 enum vect_def_type *dt, int pwr,
955 stmt_vector_for_cost *cost_vec)
8bd37302
BS
956{
957 int i, tmp;
92345349 958 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 959
8bd37302
BS
960 for (i = 0; i < pwr + 1; i++)
961 {
962 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
963 (i + 1) : i;
68435eb2
RB
964 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
965 vec_promote_demote, stmt_info, 0,
966 vect_body);
8bd37302
BS
967 }
968
969 /* FORNOW: Assuming maximum 2 args per stmts. */
970 for (i = 0; i < 2; i++)
92345349 971 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
972 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
973 stmt_info, 0, vect_prologue);
8bd37302 974
73fbfcad 975 if (dump_enabled_p ())
78c60e3d
SS
976 dump_printf_loc (MSG_NOTE, vect_location,
977 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 978 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
979}
980
ebfd146a
IR
981/* Function vect_model_store_cost
982
0d0293ac
MM
983 Models cost for stores. In the case of grouped accesses, one access
984 has the overhead of the grouped access attributed to it. */
ebfd146a 985
68435eb2 986static void
b8698a0f 987vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 988 enum vect_def_type dt,
2de001ee 989 vect_memory_access_type memory_access_type,
9ce4345a 990 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 991 stmt_vector_for_cost *cost_vec)
ebfd146a 992{
92345349 993 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
994 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
995 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 996
68435eb2
RB
997 /* ??? Somehow we need to fix this at the callers. */
998 if (slp_node)
999 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1000
9ce4345a 1001 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
1002 {
1003 if (slp_node)
1004 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1005 1, dt, cost_vec);
1006 else
1007 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1008 stmt_info, 0, vect_prologue);
1009 }
ebfd146a 1010
892a981f
RS
1011 /* Grouped stores update all elements in the group at once,
1012 so we want the DR for the first statement. */
1013 if (!slp_node && grouped_access_p)
57c454d2 1014 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1015
892a981f
RS
1016 /* True if we should include any once-per-group costs as well as
1017 the cost of the statement itself. For SLP we only get called
1018 once per group anyhow. */
1019 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1020
272c6793 1021 /* We assume that the cost of a single store-lanes instruction is
2c53b149 1022 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
272c6793 1023 access is instead being provided by a permute-and-store operation,
2de001ee
RS
1024 include the cost of the permutes. */
1025 if (first_stmt_p
1026 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1027 {
e1377713
ES
1028 /* Uses a high and low interleave or shuffle operations for each
1029 needed permute. */
2c53b149 1030 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 1031 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1032 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1033 stmt_info, 0, vect_body);
ebfd146a 1034
73fbfcad 1035 if (dump_enabled_p ())
78c60e3d 1036 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1037 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1038 group_size);
ebfd146a
IR
1039 }
1040
cee62fee 1041 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1042 /* Costs of the stores. */
067bc855
RB
1043 if (memory_access_type == VMAT_ELEMENTWISE
1044 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1045 {
1046 /* N scalar stores plus extracting the elements. */
1047 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1048 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1049 ncopies * assumed_nunits,
1050 scalar_store, stmt_info, 0, vect_body);
1051 }
f2e2a985 1052 else
57c454d2 1053 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
ebfd146a 1054
2de001ee
RS
1055 if (memory_access_type == VMAT_ELEMENTWISE
1056 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1057 {
1058 /* N scalar stores plus extracting the elements. */
1059 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1060 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1061 ncopies * assumed_nunits,
1062 vec_to_scalar, stmt_info, 0, vect_body);
1063 }
cee62fee 1064
73fbfcad 1065 if (dump_enabled_p ())
78c60e3d
SS
1066 dump_printf_loc (MSG_NOTE, vect_location,
1067 "vect_model_store_cost: inside_cost = %d, "
e645e942 1068 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1069}
1070
1071
720f5239
IR
1072/* Calculate cost of DR's memory access. */
1073void
57c454d2 1074vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1075 unsigned int *inside_cost,
92345349 1076 stmt_vector_for_cost *body_cost_vec)
720f5239 1077{
57c454d2 1078 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1079 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1080
1081 switch (alignment_support_scheme)
1082 {
1083 case dr_aligned:
1084 {
92345349
BS
1085 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1086 vector_store, stmt_info, 0,
1087 vect_body);
720f5239 1088
73fbfcad 1089 if (dump_enabled_p ())
78c60e3d 1090 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1091 "vect_model_store_cost: aligned.\n");
720f5239
IR
1092 break;
1093 }
1094
1095 case dr_unaligned_supported:
1096 {
720f5239 1097 /* Here, we assign an additional cost for the unaligned store. */
92345349 1098 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1099 unaligned_store, stmt_info,
92345349 1100 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1101 if (dump_enabled_p ())
78c60e3d
SS
1102 dump_printf_loc (MSG_NOTE, vect_location,
1103 "vect_model_store_cost: unaligned supported by "
e645e942 1104 "hardware.\n");
720f5239
IR
1105 break;
1106 }
1107
38eec4c6
UW
1108 case dr_unaligned_unsupported:
1109 {
1110 *inside_cost = VECT_MAX_COST;
1111
73fbfcad 1112 if (dump_enabled_p ())
78c60e3d 1113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1114 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1115 break;
1116 }
1117
720f5239
IR
1118 default:
1119 gcc_unreachable ();
1120 }
1121}
1122
1123
ebfd146a
IR
1124/* Function vect_model_load_cost
1125
892a981f
RS
1126 Models cost for loads. In the case of grouped accesses, one access has
1127 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1128 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1129 access scheme chosen. */
1130
68435eb2
RB
1131static void
1132vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1133 vect_memory_access_type memory_access_type,
68435eb2 1134 slp_instance instance,
2de001ee 1135 slp_tree slp_node,
68435eb2 1136 stmt_vector_for_cost *cost_vec)
ebfd146a 1137{
892a981f 1138 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
92345349 1139 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1140 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1141
68435eb2
RB
1142 gcc_assert (cost_vec);
1143
1144 /* ??? Somehow we need to fix this at the callers. */
1145 if (slp_node)
1146 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1147
1148 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1149 {
1150 /* If the load is permuted then the alignment is determined by
1151 the first group element not by the first scalar stmt DR. */
2c53b149 1152 gimple *stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
68435eb2
RB
1153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1154 /* Record the cost for the permutation. */
1155 unsigned n_perms;
1156 unsigned assumed_nunits
1157 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1158 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1159 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1160 slp_vf, instance, true,
1161 &n_perms);
1162 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1163 stmt_info, 0, vect_body);
1164 /* And adjust the number of loads performed. This handles
1165 redundancies as well as loads that are later dead. */
2c53b149 1166 auto_sbitmap perm (DR_GROUP_SIZE (stmt_info));
68435eb2
RB
1167 bitmap_clear (perm);
1168 for (unsigned i = 0;
1169 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1170 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1171 ncopies = 0;
1172 bool load_seen = false;
2c53b149 1173 for (unsigned i = 0; i < DR_GROUP_SIZE (stmt_info); ++i)
68435eb2
RB
1174 {
1175 if (i % assumed_nunits == 0)
1176 {
1177 if (load_seen)
1178 ncopies++;
1179 load_seen = false;
1180 }
1181 if (bitmap_bit_p (perm, i))
1182 load_seen = true;
1183 }
1184 if (load_seen)
1185 ncopies++;
1186 gcc_assert (ncopies
2c53b149 1187 <= (DR_GROUP_SIZE (stmt_info) - DR_GROUP_GAP (stmt_info)
68435eb2
RB
1188 + assumed_nunits - 1) / assumed_nunits);
1189 }
1190
892a981f
RS
1191 /* Grouped loads read all elements in the group at once,
1192 so we want the DR for the first statement. */
1193 if (!slp_node && grouped_access_p)
57c454d2 1194 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1195
892a981f
RS
1196 /* True if we should include any once-per-group costs as well as
1197 the cost of the statement itself. For SLP we only get called
1198 once per group anyhow. */
1199 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1200
272c6793 1201 /* We assume that the cost of a single load-lanes instruction is
2c53b149 1202 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
272c6793 1203 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1204 include the cost of the permutes. */
1205 if (first_stmt_p
1206 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1207 {
2c23db6d
ES
1208 /* Uses an even and odd extract operations or shuffle operations
1209 for each needed permute. */
2c53b149 1210 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d 1211 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1212 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1213 stmt_info, 0, vect_body);
ebfd146a 1214
73fbfcad 1215 if (dump_enabled_p ())
e645e942
TJ
1216 dump_printf_loc (MSG_NOTE, vect_location,
1217 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1218 group_size);
ebfd146a
IR
1219 }
1220
1221 /* The loads themselves. */
067bc855
RB
1222 if (memory_access_type == VMAT_ELEMENTWISE
1223 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1224 {
a21892ad
BS
1225 /* N scalar loads plus gathering them into a vector. */
1226 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1227 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1228 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1229 ncopies * assumed_nunits,
92345349 1230 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1231 }
1232 else
57c454d2 1233 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
92345349 1234 &inside_cost, &prologue_cost,
68435eb2 1235 cost_vec, cost_vec, true);
2de001ee
RS
1236 if (memory_access_type == VMAT_ELEMENTWISE
1237 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1238 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1239 stmt_info, 0, vect_body);
720f5239 1240
73fbfcad 1241 if (dump_enabled_p ())
78c60e3d
SS
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_model_load_cost: inside_cost = %d, "
e645e942 1244 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1245}
1246
1247
1248/* Calculate cost of DR's memory access. */
1249void
57c454d2 1250vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1251 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1252 unsigned int *prologue_cost,
1253 stmt_vector_for_cost *prologue_cost_vec,
1254 stmt_vector_for_cost *body_cost_vec,
1255 bool record_prologue_costs)
720f5239 1256{
57c454d2 1257 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1258 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1259
1260 switch (alignment_support_scheme)
ebfd146a
IR
1261 {
1262 case dr_aligned:
1263 {
92345349
BS
1264 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1265 stmt_info, 0, vect_body);
ebfd146a 1266
73fbfcad 1267 if (dump_enabled_p ())
78c60e3d 1268 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1269 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1270
1271 break;
1272 }
1273 case dr_unaligned_supported:
1274 {
720f5239 1275 /* Here, we assign an additional cost for the unaligned load. */
92345349 1276 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1277 unaligned_load, stmt_info,
92345349 1278 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1279
73fbfcad 1280 if (dump_enabled_p ())
78c60e3d
SS
1281 dump_printf_loc (MSG_NOTE, vect_location,
1282 "vect_model_load_cost: unaligned supported by "
e645e942 1283 "hardware.\n");
ebfd146a
IR
1284
1285 break;
1286 }
1287 case dr_explicit_realign:
1288 {
92345349
BS
1289 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1290 vector_load, stmt_info, 0, vect_body);
1291 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1292 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1293
1294 /* FIXME: If the misalignment remains fixed across the iterations of
1295 the containing loop, the following cost should be added to the
92345349 1296 prologue costs. */
ebfd146a 1297 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1298 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1299 stmt_info, 0, vect_body);
ebfd146a 1300
73fbfcad 1301 if (dump_enabled_p ())
e645e942
TJ
1302 dump_printf_loc (MSG_NOTE, vect_location,
1303 "vect_model_load_cost: explicit realign\n");
8bd37302 1304
ebfd146a
IR
1305 break;
1306 }
1307 case dr_explicit_realign_optimized:
1308 {
73fbfcad 1309 if (dump_enabled_p ())
e645e942 1310 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1311 "vect_model_load_cost: unaligned software "
e645e942 1312 "pipelined.\n");
ebfd146a
IR
1313
1314 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1315 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1316 if this is an access in a group of loads, which provide grouped
ebfd146a 1317 access, then the above cost should only be considered for one
ff802fa1 1318 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1319 and a realignment op. */
1320
92345349 1321 if (add_realign_cost && record_prologue_costs)
ebfd146a 1322 {
92345349
BS
1323 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1324 vector_stmt, stmt_info,
1325 0, vect_prologue);
ebfd146a 1326 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1327 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1328 vector_stmt, stmt_info,
1329 0, vect_prologue);
ebfd146a
IR
1330 }
1331
92345349
BS
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1333 stmt_info, 0, vect_body);
1334 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1335 stmt_info, 0, vect_body);
8bd37302 1336
73fbfcad 1337 if (dump_enabled_p ())
78c60e3d 1338 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1339 "vect_model_load_cost: explicit realign optimized"
1340 "\n");
8bd37302 1341
ebfd146a
IR
1342 break;
1343 }
1344
38eec4c6
UW
1345 case dr_unaligned_unsupported:
1346 {
1347 *inside_cost = VECT_MAX_COST;
1348
73fbfcad 1349 if (dump_enabled_p ())
78c60e3d 1350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1351 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1352 break;
1353 }
1354
ebfd146a
IR
1355 default:
1356 gcc_unreachable ();
1357 }
ebfd146a
IR
1358}
1359
418b7df3
RG
1360/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1361 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1362
418b7df3 1363static void
355fe088 1364vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1365{
ebfd146a 1366 if (gsi)
418b7df3 1367 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1368 else
1369 {
418b7df3 1370 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1371 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1372
a70d6342
IR
1373 if (loop_vinfo)
1374 {
1375 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1376 basic_block new_bb;
1377 edge pe;
a70d6342
IR
1378
1379 if (nested_in_vect_loop_p (loop, stmt))
1380 loop = loop->inner;
b8698a0f 1381
a70d6342 1382 pe = loop_preheader_edge (loop);
418b7df3 1383 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1384 gcc_assert (!new_bb);
1385 }
1386 else
1387 {
1388 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1389 basic_block bb;
1390 gimple_stmt_iterator gsi_bb_start;
1391
1392 gcc_assert (bb_vinfo);
1393 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1394 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1395 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1396 }
ebfd146a
IR
1397 }
1398
73fbfcad 1399 if (dump_enabled_p ())
ebfd146a 1400 {
78c60e3d
SS
1401 dump_printf_loc (MSG_NOTE, vect_location,
1402 "created new init_stmt: ");
1403 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1404 }
418b7df3
RG
1405}
1406
1407/* Function vect_init_vector.
ebfd146a 1408
5467ee52
RG
1409 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1410 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1411 vector type a vector with all elements equal to VAL is created first.
1412 Place the initialization at BSI if it is not NULL. Otherwise, place the
1413 initialization at the loop preheader.
418b7df3
RG
1414 Return the DEF of INIT_STMT.
1415 It will be used in the vectorization of STMT. */
1416
1417tree
355fe088 1418vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1419{
355fe088 1420 gimple *init_stmt;
418b7df3
RG
1421 tree new_temp;
1422
e412ece4
RB
1423 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1424 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1425 {
e412ece4
RB
1426 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1427 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1428 {
5a308cf1
IE
1429 /* Scalar boolean value should be transformed into
1430 all zeros or all ones value before building a vector. */
1431 if (VECTOR_BOOLEAN_TYPE_P (type))
1432 {
b3d51f23
IE
1433 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1434 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1435
1436 if (CONSTANT_CLASS_P (val))
1437 val = integer_zerop (val) ? false_val : true_val;
1438 else
1439 {
1440 new_temp = make_ssa_name (TREE_TYPE (type));
1441 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1442 val, true_val, false_val);
1443 vect_init_vector_1 (stmt, init_stmt, gsi);
1444 val = new_temp;
1445 }
1446 }
1447 else if (CONSTANT_CLASS_P (val))
42fd8198 1448 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1449 else
1450 {
b731b390 1451 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1452 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1453 init_stmt = gimple_build_assign (new_temp,
1454 fold_build1 (VIEW_CONVERT_EXPR,
1455 TREE_TYPE (type),
1456 val));
1457 else
1458 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1459 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1460 val = new_temp;
418b7df3
RG
1461 }
1462 }
5467ee52 1463 val = build_vector_from_val (type, val);
418b7df3
RG
1464 }
1465
0e22bb5a
RB
1466 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1467 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1468 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1469 return new_temp;
ebfd146a
IR
1470}
1471
c83a894c 1472/* Function vect_get_vec_def_for_operand_1.
a70d6342 1473
c83a894c
AH
1474 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1475 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1476
1477tree
c83a894c 1478vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1479{
1480 tree vec_oprnd;
355fe088 1481 gimple *vec_stmt;
ebfd146a 1482 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1483
1484 switch (dt)
1485 {
81c40241 1486 /* operand is a constant or a loop invariant. */
ebfd146a 1487 case vect_constant_def:
81c40241 1488 case vect_external_def:
c83a894c
AH
1489 /* Code should use vect_get_vec_def_for_operand. */
1490 gcc_unreachable ();
ebfd146a 1491
81c40241 1492 /* operand is defined inside the loop. */
8644a673 1493 case vect_internal_def:
ebfd146a 1494 {
ebfd146a
IR
1495 /* Get the def from the vectorized stmt. */
1496 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1497
ebfd146a 1498 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1499 /* Get vectorized pattern statement. */
1500 if (!vec_stmt
1501 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1502 && !STMT_VINFO_RELEVANT (def_stmt_info))
1503 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1504 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1505 gcc_assert (vec_stmt);
1506 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1507 vec_oprnd = PHI_RESULT (vec_stmt);
1508 else if (is_gimple_call (vec_stmt))
1509 vec_oprnd = gimple_call_lhs (vec_stmt);
1510 else
1511 vec_oprnd = gimple_assign_lhs (vec_stmt);
1512 return vec_oprnd;
1513 }
1514
c78e3652 1515 /* operand is defined by a loop header phi. */
ebfd146a 1516 case vect_reduction_def:
06066f92 1517 case vect_double_reduction_def:
7c5222ff 1518 case vect_nested_cycle:
ebfd146a
IR
1519 case vect_induction_def:
1520 {
1521 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1522
1523 /* Get the def from the vectorized stmt. */
1524 def_stmt_info = vinfo_for_stmt (def_stmt);
1525 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1526 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1527 vec_oprnd = PHI_RESULT (vec_stmt);
1528 else
1529 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1530 return vec_oprnd;
1531 }
1532
1533 default:
1534 gcc_unreachable ();
1535 }
1536}
1537
1538
c83a894c
AH
1539/* Function vect_get_vec_def_for_operand.
1540
1541 OP is an operand in STMT. This function returns a (vector) def that will be
1542 used in the vectorized stmt for STMT.
1543
1544 In the case that OP is an SSA_NAME which is defined in the loop, then
1545 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1546
1547 In case OP is an invariant or constant, a new stmt that creates a vector def
1548 needs to be introduced. VECTYPE may be used to specify a required type for
1549 vector invariant. */
1550
1551tree
1552vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1553{
1554 gimple *def_stmt;
1555 enum vect_def_type dt;
1556 bool is_simple_use;
1557 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1558 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1559
1560 if (dump_enabled_p ())
1561 {
1562 dump_printf_loc (MSG_NOTE, vect_location,
1563 "vect_get_vec_def_for_operand: ");
1564 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1565 dump_printf (MSG_NOTE, "\n");
1566 }
1567
894dd753 1568 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt, &def_stmt);
c83a894c
AH
1569 gcc_assert (is_simple_use);
1570 if (def_stmt && dump_enabled_p ())
1571 {
1572 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1573 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1574 }
1575
1576 if (dt == vect_constant_def || dt == vect_external_def)
1577 {
1578 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1579 tree vector_type;
1580
1581 if (vectype)
1582 vector_type = vectype;
2568d8a1 1583 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1584 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1585 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1586 else
1587 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1588
1589 gcc_assert (vector_type);
1590 return vect_init_vector (stmt, op, vector_type, NULL);
1591 }
1592 else
1593 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1594}
1595
1596
ebfd146a
IR
1597/* Function vect_get_vec_def_for_stmt_copy
1598
ff802fa1 1599 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1600 vectorized stmt to be created (by the caller to this function) is a "copy"
1601 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1602 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1603 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1604 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1605 DT is the type of the vector def VEC_OPRND.
1606
1607 Context:
1608 In case the vectorization factor (VF) is bigger than the number
1609 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1610 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1611 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1612 smallest data-type determines the VF, and as a result, when vectorizing
1613 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1614 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1615 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1616 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1617 which VF=16 and nunits=4, so the number of copies required is 4):
1618
1619 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1620
ebfd146a
IR
1621 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1622 VS1.1: vx.1 = memref1 VS1.2
1623 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1624 VS1.3: vx.3 = memref3
ebfd146a
IR
1625
1626 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1627 VSnew.1: vz1 = vx.1 + ... VSnew.2
1628 VSnew.2: vz2 = vx.2 + ... VSnew.3
1629 VSnew.3: vz3 = vx.3 + ...
1630
1631 The vectorization of S1 is explained in vectorizable_load.
1632 The vectorization of S2:
b8698a0f
L
1633 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1634 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1635 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1636 returns the vector-def 'vx.0'.
1637
b8698a0f
L
1638 To create the remaining copies of the vector-stmt (VSnew.j), this
1639 function is called to get the relevant vector-def for each operand. It is
1640 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1641 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1642
b8698a0f
L
1643 For example, to obtain the vector-def 'vx.1' in order to create the
1644 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1645 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1646 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1647 and return its def ('vx.1').
1648 Overall, to create the above sequence this function will be called 3 times:
1649 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1650 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1651 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1652
1653tree
1654vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1655{
355fe088 1656 gimple *vec_stmt_for_operand;
ebfd146a
IR
1657 stmt_vec_info def_stmt_info;
1658
1659 /* Do nothing; can reuse same def. */
8644a673 1660 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1661 return vec_oprnd;
1662
1663 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1664 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1665 gcc_assert (def_stmt_info);
1666 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1667 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1668 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1669 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1670 else
1671 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1672 return vec_oprnd;
1673}
1674
1675
1676/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1677 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1678
c78e3652 1679void
b8698a0f 1680vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1681 vec<tree> *vec_oprnds0,
1682 vec<tree> *vec_oprnds1)
ebfd146a 1683{
9771b263 1684 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1685
1686 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1687 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1688
9771b263 1689 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1690 {
9771b263 1691 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1692 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1693 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1694 }
1695}
1696
1697
c78e3652 1698/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1699
c78e3652 1700void
355fe088 1701vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1702 vec<tree> *vec_oprnds0,
1703 vec<tree> *vec_oprnds1,
306b0c92 1704 slp_tree slp_node)
ebfd146a
IR
1705{
1706 if (slp_node)
d092494c
IR
1707 {
1708 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1709 auto_vec<tree> ops (nops);
1710 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1711
9771b263 1712 ops.quick_push (op0);
d092494c 1713 if (op1)
9771b263 1714 ops.quick_push (op1);
d092494c 1715
306b0c92 1716 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1717
37b5ec8f 1718 *vec_oprnds0 = vec_defs[0];
d092494c 1719 if (op1)
37b5ec8f 1720 *vec_oprnds1 = vec_defs[1];
d092494c 1721 }
ebfd146a
IR
1722 else
1723 {
1724 tree vec_oprnd;
1725
9771b263 1726 vec_oprnds0->create (1);
81c40241 1727 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1728 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1729
1730 if (op1)
1731 {
9771b263 1732 vec_oprnds1->create (1);
81c40241 1733 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1734 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1735 }
1736 }
1737}
1738
bb6c2b68
RS
1739/* Helper function called by vect_finish_replace_stmt and
1740 vect_finish_stmt_generation. Set the location of the new
1741 statement and create a stmt_vec_info for it. */
1742
1743static void
1744vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1745{
1746 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1747 vec_info *vinfo = stmt_info->vinfo;
1748
1749 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1750
1751 if (dump_enabled_p ())
1752 {
1753 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1754 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1755 }
1756
1757 gimple_set_location (vec_stmt, gimple_location (stmt));
1758
1759 /* While EH edges will generally prevent vectorization, stmt might
1760 e.g. be in a must-not-throw region. Ensure newly created stmts
1761 that could throw are part of the same region. */
1762 int lp_nr = lookup_stmt_eh_lp (stmt);
1763 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1764 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1765}
1766
1767/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1768 which sets the same scalar result as STMT did. */
1769
1770void
1771vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1772{
1773 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1774
1775 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1776 gsi_replace (&gsi, vec_stmt, false);
1777
1778 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1779}
ebfd146a
IR
1780
1781/* Function vect_finish_stmt_generation.
1782
1783 Insert a new stmt. */
1784
1785void
355fe088 1786vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1787 gimple_stmt_iterator *gsi)
1788{
ebfd146a
IR
1789 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1790
54e8e2c3
RG
1791 if (!gsi_end_p (*gsi)
1792 && gimple_has_mem_ops (vec_stmt))
1793 {
355fe088 1794 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1795 tree vuse = gimple_vuse (at_stmt);
1796 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1797 {
1798 tree vdef = gimple_vdef (at_stmt);
1799 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1800 /* If we have an SSA vuse and insert a store, update virtual
1801 SSA form to avoid triggering the renamer. Do so only
1802 if we can easily see all uses - which is what almost always
1803 happens with the way vectorized stmts are inserted. */
1804 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1805 && ((is_gimple_assign (vec_stmt)
1806 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1807 || (is_gimple_call (vec_stmt)
1808 && !(gimple_call_flags (vec_stmt)
1809 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1810 {
1811 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1812 gimple_set_vdef (vec_stmt, new_vdef);
1813 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1814 }
1815 }
1816 }
ebfd146a 1817 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
bb6c2b68 1818 vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1819}
1820
70439f0d
RS
1821/* We want to vectorize a call to combined function CFN with function
1822 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1823 as the types of all inputs. Check whether this is possible using
1824 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1825
70439f0d
RS
1826static internal_fn
1827vectorizable_internal_function (combined_fn cfn, tree fndecl,
1828 tree vectype_out, tree vectype_in)
ebfd146a 1829{
70439f0d
RS
1830 internal_fn ifn;
1831 if (internal_fn_p (cfn))
1832 ifn = as_internal_fn (cfn);
1833 else
1834 ifn = associated_internal_fn (fndecl);
1835 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1836 {
1837 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1838 if (info.vectorizable)
1839 {
1840 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1841 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1842 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1843 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1844 return ifn;
1845 }
1846 }
1847 return IFN_LAST;
ebfd146a
IR
1848}
1849
5ce9450f 1850
355fe088 1851static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1852 gimple_stmt_iterator *);
1853
7cfb4d93
RS
1854/* Check whether a load or store statement in the loop described by
1855 LOOP_VINFO is possible in a fully-masked loop. This is testing
1856 whether the vectorizer pass has the appropriate support, as well as
1857 whether the target does.
1858
1859 VLS_TYPE says whether the statement is a load or store and VECTYPE
1860 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1861 says how the load or store is going to be implemented and GROUP_SIZE
1862 is the number of load or store statements in the containing group.
bfaa08b7
RS
1863 If the access is a gather load or scatter store, GS_INFO describes
1864 its arguments.
7cfb4d93
RS
1865
1866 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1867 supported, otherwise record the required mask types. */
1868
1869static void
1870check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1871 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1872 vect_memory_access_type memory_access_type,
1873 gather_scatter_info *gs_info)
7cfb4d93
RS
1874{
1875 /* Invariant loads need no special support. */
1876 if (memory_access_type == VMAT_INVARIANT)
1877 return;
1878
1879 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1880 machine_mode vecmode = TYPE_MODE (vectype);
1881 bool is_load = (vls_type == VLS_LOAD);
1882 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1883 {
1884 if (is_load
1885 ? !vect_load_lanes_supported (vectype, group_size, true)
1886 : !vect_store_lanes_supported (vectype, group_size, true))
1887 {
1888 if (dump_enabled_p ())
1889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1890 "can't use a fully-masked loop because the"
1891 " target doesn't have an appropriate masked"
1892 " load/store-lanes instruction.\n");
1893 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1894 return;
1895 }
1896 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1897 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1898 return;
1899 }
1900
bfaa08b7
RS
1901 if (memory_access_type == VMAT_GATHER_SCATTER)
1902 {
f307441a
RS
1903 internal_fn ifn = (is_load
1904 ? IFN_MASK_GATHER_LOAD
1905 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1906 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1907 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1908 gs_info->memory_type,
1909 TYPE_SIGN (offset_type),
1910 gs_info->scale))
1911 {
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1914 "can't use a fully-masked loop because the"
1915 " target doesn't have an appropriate masked"
f307441a 1916 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1917 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1918 return;
1919 }
1920 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1921 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1922 return;
1923 }
1924
7cfb4d93
RS
1925 if (memory_access_type != VMAT_CONTIGUOUS
1926 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1927 {
1928 /* Element X of the data must come from iteration i * VF + X of the
1929 scalar loop. We need more work to support other mappings. */
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1932 "can't use a fully-masked loop because an access"
1933 " isn't contiguous.\n");
1934 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1935 return;
1936 }
1937
1938 machine_mode mask_mode;
1939 if (!(targetm.vectorize.get_mask_mode
1940 (GET_MODE_NUNITS (vecmode),
1941 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1942 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1943 {
1944 if (dump_enabled_p ())
1945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1946 "can't use a fully-masked loop because the target"
1947 " doesn't have the appropriate masked load or"
1948 " store.\n");
1949 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1950 return;
1951 }
1952 /* We might load more scalars than we need for permuting SLP loads.
1953 We checked in get_group_load_store_type that the extra elements
1954 don't leak into a new vector. */
1955 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1956 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1957 unsigned int nvectors;
1958 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1959 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1960 else
1961 gcc_unreachable ();
1962}
1963
1964/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1965 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1966 that needs to be applied to all loads and stores in a vectorized loop.
1967 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1968
1969 MASK_TYPE is the type of both masks. If new statements are needed,
1970 insert them before GSI. */
1971
1972static tree
1973prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1974 gimple_stmt_iterator *gsi)
1975{
1976 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1977 if (!loop_mask)
1978 return vec_mask;
1979
1980 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1981 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1982 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1983 vec_mask, loop_mask);
1984 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1985 return and_res;
1986}
1987
429ef523
RS
1988/* Determine whether we can use a gather load or scatter store to vectorize
1989 strided load or store STMT by truncating the current offset to a smaller
1990 width. We need to be able to construct an offset vector:
1991
1992 { 0, X, X*2, X*3, ... }
1993
1994 without loss of precision, where X is STMT's DR_STEP.
1995
1996 Return true if this is possible, describing the gather load or scatter
1997 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1998
1999static bool
2000vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
2001 bool masked_p,
2002 gather_scatter_info *gs_info)
2003{
2004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2005 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2006 tree step = DR_STEP (dr);
2007 if (TREE_CODE (step) != INTEGER_CST)
2008 {
2009 /* ??? Perhaps we could use range information here? */
2010 if (dump_enabled_p ())
2011 dump_printf_loc (MSG_NOTE, vect_location,
2012 "cannot truncate variable step.\n");
2013 return false;
2014 }
2015
2016 /* Get the number of bits in an element. */
2017 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2018 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2019 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2020
2021 /* Set COUNT to the upper limit on the number of elements - 1.
2022 Start with the maximum vectorization factor. */
2023 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2024
2025 /* Try lowering COUNT to the number of scalar latch iterations. */
2026 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2027 widest_int max_iters;
2028 if (max_loop_iterations (loop, &max_iters)
2029 && max_iters < count)
2030 count = max_iters.to_shwi ();
2031
2032 /* Try scales of 1 and the element size. */
2033 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2034 bool overflow_p = false;
2035 for (int i = 0; i < 2; ++i)
2036 {
2037 int scale = scales[i];
2038 widest_int factor;
2039 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2040 continue;
2041
2042 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2043 in OFFSET_BITS bits. */
2044 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
2045 if (overflow_p)
2046 continue;
2047 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2048 if (wi::min_precision (range, sign) > element_bits)
2049 {
2050 overflow_p = true;
2051 continue;
2052 }
2053
2054 /* See whether the target supports the operation. */
2055 tree memory_type = TREE_TYPE (DR_REF (dr));
2056 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2057 memory_type, element_bits, sign, scale,
2058 &gs_info->ifn, &gs_info->element_type))
2059 continue;
2060
2061 tree offset_type = build_nonstandard_integer_type (element_bits,
2062 sign == UNSIGNED);
2063
2064 gs_info->decl = NULL_TREE;
2065 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2066 but we don't need to store that here. */
2067 gs_info->base = NULL_TREE;
2068 gs_info->offset = fold_convert (offset_type, step);
929b4411 2069 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2070 gs_info->offset_vectype = NULL_TREE;
2071 gs_info->scale = scale;
2072 gs_info->memory_type = memory_type;
2073 return true;
2074 }
2075
2076 if (overflow_p && dump_enabled_p ())
2077 dump_printf_loc (MSG_NOTE, vect_location,
2078 "truncating gather/scatter offset to %d bits"
2079 " might change its value.\n", element_bits);
2080
2081 return false;
2082}
2083
ab2fc782
RS
2084/* Return true if we can use gather/scatter internal functions to
2085 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
2086 MASKED_P is true if load or store is conditional. When returning
2087 true, fill in GS_INFO with the information required to perform the
2088 operation. */
ab2fc782
RS
2089
2090static bool
2091vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 2092 bool masked_p,
ab2fc782
RS
2093 gather_scatter_info *gs_info)
2094{
2095 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2096 || gs_info->decl)
429ef523
RS
2097 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2098 masked_p, gs_info);
ab2fc782
RS
2099
2100 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2101 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2102 tree offset_type = TREE_TYPE (gs_info->offset);
2103 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2104
2105 /* Enforced by vect_check_gather_scatter. */
2106 gcc_assert (element_bits >= offset_bits);
2107
2108 /* If the elements are wider than the offset, convert the offset to the
2109 same width, without changing its sign. */
2110 if (element_bits > offset_bits)
2111 {
2112 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2113 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2114 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2115 }
2116
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_NOTE, vect_location,
2119 "using gather/scatter for strided/grouped access,"
2120 " scale = %d\n", gs_info->scale);
2121
2122 return true;
2123}
2124
62da9e14
RS
2125/* STMT is a non-strided load or store, meaning that it accesses
2126 elements with a known constant step. Return -1 if that step
2127 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2128
2129static int
2130compare_step_with_zero (gimple *stmt)
2131{
2132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2133 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2134 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2135 size_zero_node);
62da9e14
RS
2136}
2137
2138/* If the target supports a permute mask that reverses the elements in
2139 a vector of type VECTYPE, return that mask, otherwise return null. */
2140
2141static tree
2142perm_mask_for_reverse (tree vectype)
2143{
928686b1 2144 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2145
d980067b
RS
2146 /* The encoding has a single stepped pattern. */
2147 vec_perm_builder sel (nunits, 1, 3);
928686b1 2148 for (int i = 0; i < 3; ++i)
908a1a16 2149 sel.quick_push (nunits - 1 - i);
62da9e14 2150
e3342de4
RS
2151 vec_perm_indices indices (sel, 1, nunits);
2152 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2153 return NULL_TREE;
e3342de4 2154 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2155}
5ce9450f 2156
c3a8f964
RS
2157/* STMT is either a masked or unconditional store. Return the value
2158 being stored. */
2159
f307441a 2160tree
c3a8f964
RS
2161vect_get_store_rhs (gimple *stmt)
2162{
2163 if (gassign *assign = dyn_cast <gassign *> (stmt))
2164 {
2165 gcc_assert (gimple_assign_single_p (assign));
2166 return gimple_assign_rhs1 (assign);
2167 }
2168 if (gcall *call = dyn_cast <gcall *> (stmt))
2169 {
2170 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2171 int index = internal_fn_stored_value_index (ifn);
2172 gcc_assert (index >= 0);
2173 return gimple_call_arg (stmt, index);
c3a8f964
RS
2174 }
2175 gcc_unreachable ();
2176}
2177
2de001ee
RS
2178/* A subroutine of get_load_store_type, with a subset of the same
2179 arguments. Handle the case where STMT is part of a grouped load
2180 or store.
2181
2182 For stores, the statements in the group are all consecutive
2183 and there is no gap at the end. For loads, the statements in the
2184 group might not be consecutive; there can be gaps between statements
2185 as well as at the end. */
2186
2187static bool
2188get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2189 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2190 vect_memory_access_type *memory_access_type,
2191 gather_scatter_info *gs_info)
2de001ee
RS
2192{
2193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2194 vec_info *vinfo = stmt_info->vinfo;
2195 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2196 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2c53b149 2197 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 2198 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 2199 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2de001ee 2200 bool single_element_p = (stmt == first_stmt
2c53b149
RB
2201 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2202 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 2203 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2204
2205 /* True if the vectorized statements would access beyond the last
2206 statement in the group. */
2207 bool overrun_p = false;
2208
2209 /* True if we can cope with such overrun by peeling for gaps, so that
2210 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2211 bool can_overrun_p = (!masked_p
2212 && vls_type == VLS_LOAD
2213 && loop_vinfo
2214 && !loop->inner);
2de001ee
RS
2215
2216 /* There can only be a gap at the end of the group if the stride is
2217 known at compile time. */
2218 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2219
2220 /* Stores can't yet have gaps. */
2221 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2222
2223 if (slp)
2224 {
2225 if (STMT_VINFO_STRIDED_P (stmt_info))
2226 {
2c53b149 2227 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2de001ee
RS
2228 separated by the stride, until we have a complete vector.
2229 Fall back to scalar accesses if that isn't possible. */
928686b1 2230 if (multiple_p (nunits, group_size))
2de001ee
RS
2231 *memory_access_type = VMAT_STRIDED_SLP;
2232 else
2233 *memory_access_type = VMAT_ELEMENTWISE;
2234 }
2235 else
2236 {
2237 overrun_p = loop_vinfo && gap != 0;
2238 if (overrun_p && vls_type != VLS_LOAD)
2239 {
2240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2241 "Grouped store with gaps requires"
2242 " non-consecutive accesses\n");
2243 return false;
2244 }
f702e7d4
RS
2245 /* An overrun is fine if the trailing elements are smaller
2246 than the alignment boundary B. Every vector access will
2247 be a multiple of B and so we are guaranteed to access a
2248 non-gap element in the same B-sized block. */
f9ef2c76 2249 if (overrun_p
f702e7d4
RS
2250 && gap < (vect_known_alignment_in_bytes (first_dr)
2251 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2252 overrun_p = false;
2de001ee
RS
2253 if (overrun_p && !can_overrun_p)
2254 {
2255 if (dump_enabled_p ())
2256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2257 "Peeling for outer loop is not supported\n");
2258 return false;
2259 }
2260 *memory_access_type = VMAT_CONTIGUOUS;
2261 }
2262 }
2263 else
2264 {
2265 /* We can always handle this case using elementwise accesses,
2266 but see if something more efficient is available. */
2267 *memory_access_type = VMAT_ELEMENTWISE;
2268
2269 /* If there is a gap at the end of the group then these optimizations
2270 would access excess elements in the last iteration. */
2271 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2272 /* An overrun is fine if the trailing elements are smaller than the
2273 alignment boundary B. Every vector access will be a multiple of B
2274 and so we are guaranteed to access a non-gap element in the
2275 same B-sized block. */
f9ef2c76 2276 if (would_overrun_p
7e11fc7f 2277 && !masked_p
f702e7d4
RS
2278 && gap < (vect_known_alignment_in_bytes (first_dr)
2279 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2280 would_overrun_p = false;
f702e7d4 2281
2de001ee 2282 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2283 && (can_overrun_p || !would_overrun_p)
2284 && compare_step_with_zero (stmt) > 0)
2de001ee 2285 {
6737facb
RS
2286 /* First cope with the degenerate case of a single-element
2287 vector. */
2288 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2289 *memory_access_type = VMAT_CONTIGUOUS;
2290
2291 /* Otherwise try using LOAD/STORE_LANES. */
2292 if (*memory_access_type == VMAT_ELEMENTWISE
2293 && (vls_type == VLS_LOAD
7e11fc7f
RS
2294 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2295 : vect_store_lanes_supported (vectype, group_size,
2296 masked_p)))
2de001ee
RS
2297 {
2298 *memory_access_type = VMAT_LOAD_STORE_LANES;
2299 overrun_p = would_overrun_p;
2300 }
2301
2302 /* If that fails, try using permuting loads. */
2303 if (*memory_access_type == VMAT_ELEMENTWISE
2304 && (vls_type == VLS_LOAD
2305 ? vect_grouped_load_supported (vectype, single_element_p,
2306 group_size)
2307 : vect_grouped_store_supported (vectype, group_size)))
2308 {
2309 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2310 overrun_p = would_overrun_p;
2311 }
2312 }
429ef523
RS
2313
2314 /* As a last resort, trying using a gather load or scatter store.
2315
2316 ??? Although the code can handle all group sizes correctly,
2317 it probably isn't a win to use separate strided accesses based
2318 on nearby locations. Or, even if it's a win over scalar code,
2319 it might not be a win over vectorizing at a lower VF, if that
2320 allows us to use contiguous accesses. */
2321 if (*memory_access_type == VMAT_ELEMENTWISE
2322 && single_element_p
2323 && loop_vinfo
2324 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2325 masked_p, gs_info))
2326 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2327 }
2328
2329 if (vls_type != VLS_LOAD && first_stmt == stmt)
2330 {
2331 /* STMT is the leader of the group. Check the operands of all the
2332 stmts of the group. */
2c53b149 2333 gimple *next_stmt = DR_GROUP_NEXT_ELEMENT (stmt_info);
2de001ee
RS
2334 while (next_stmt)
2335 {
7e11fc7f 2336 tree op = vect_get_store_rhs (next_stmt);
2de001ee 2337 enum vect_def_type dt;
894dd753 2338 if (!vect_is_simple_use (op, vinfo, &dt))
2de001ee
RS
2339 {
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2342 "use not simple.\n");
2343 return false;
2344 }
2c53b149 2345 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2de001ee
RS
2346 }
2347 }
2348
2349 if (overrun_p)
2350 {
2351 gcc_assert (can_overrun_p);
2352 if (dump_enabled_p ())
2353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2354 "Data access with gaps requires scalar "
2355 "epilogue loop\n");
2356 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2357 }
2358
2359 return true;
2360}
2361
62da9e14
RS
2362/* A subroutine of get_load_store_type, with a subset of the same
2363 arguments. Handle the case where STMT is a load or store that
2364 accesses consecutive elements with a negative step. */
2365
2366static vect_memory_access_type
2367get_negative_load_store_type (gimple *stmt, tree vectype,
2368 vec_load_store_type vls_type,
2369 unsigned int ncopies)
2370{
2371 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2372 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2373 dr_alignment_support alignment_support_scheme;
2374
2375 if (ncopies > 1)
2376 {
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2379 "multiple types with negative step.\n");
2380 return VMAT_ELEMENTWISE;
2381 }
2382
2383 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2384 if (alignment_support_scheme != dr_aligned
2385 && alignment_support_scheme != dr_unaligned_supported)
2386 {
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2389 "negative step but alignment required.\n");
2390 return VMAT_ELEMENTWISE;
2391 }
2392
2393 if (vls_type == VLS_STORE_INVARIANT)
2394 {
2395 if (dump_enabled_p ())
2396 dump_printf_loc (MSG_NOTE, vect_location,
2397 "negative step with invariant source;"
2398 " no permute needed.\n");
2399 return VMAT_CONTIGUOUS_DOWN;
2400 }
2401
2402 if (!perm_mask_for_reverse (vectype))
2403 {
2404 if (dump_enabled_p ())
2405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2406 "negative step and reversing not supported.\n");
2407 return VMAT_ELEMENTWISE;
2408 }
2409
2410 return VMAT_CONTIGUOUS_REVERSE;
2411}
2412
2de001ee
RS
2413/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2414 if there is a memory access type that the vectorized form can use,
2415 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2416 or scatters, fill in GS_INFO accordingly.
2417
2418 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2419 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2420 VECTYPE is the vector type that the vectorized statements will use.
2421 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2422
2423static bool
7e11fc7f 2424get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2425 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2426 vect_memory_access_type *memory_access_type,
2427 gather_scatter_info *gs_info)
2428{
2429 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2430 vec_info *vinfo = stmt_info->vinfo;
2431 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2432 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2433 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2434 {
2435 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2436 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2437 gcc_unreachable ();
894dd753 2438 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2de001ee
RS
2439 &gs_info->offset_dt,
2440 &gs_info->offset_vectype))
2441 {
2442 if (dump_enabled_p ())
2443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2444 "%s index use not simple.\n",
2445 vls_type == VLS_LOAD ? "gather" : "scatter");
2446 return false;
2447 }
2448 }
2449 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2450 {
7e11fc7f 2451 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2452 memory_access_type, gs_info))
2de001ee
RS
2453 return false;
2454 }
2455 else if (STMT_VINFO_STRIDED_P (stmt_info))
2456 {
2457 gcc_assert (!slp);
ab2fc782 2458 if (loop_vinfo
429ef523
RS
2459 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2460 masked_p, gs_info))
ab2fc782
RS
2461 *memory_access_type = VMAT_GATHER_SCATTER;
2462 else
2463 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2464 }
2465 else
62da9e14
RS
2466 {
2467 int cmp = compare_step_with_zero (stmt);
2468 if (cmp < 0)
2469 *memory_access_type = get_negative_load_store_type
2470 (stmt, vectype, vls_type, ncopies);
2471 else if (cmp == 0)
2472 {
2473 gcc_assert (vls_type == VLS_LOAD);
2474 *memory_access_type = VMAT_INVARIANT;
2475 }
2476 else
2477 *memory_access_type = VMAT_CONTIGUOUS;
2478 }
2de001ee 2479
4d694b27
RS
2480 if ((*memory_access_type == VMAT_ELEMENTWISE
2481 || *memory_access_type == VMAT_STRIDED_SLP)
2482 && !nunits.is_constant ())
2483 {
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "Not using elementwise accesses due to variable "
2487 "vectorization factor.\n");
2488 return false;
2489 }
2490
2de001ee
RS
2491 /* FIXME: At the moment the cost model seems to underestimate the
2492 cost of using elementwise accesses. This check preserves the
2493 traditional behavior until that can be fixed. */
2494 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8 2495 && !STMT_VINFO_STRIDED_P (stmt_info)
2c53b149
RB
2496 && !(stmt == DR_GROUP_FIRST_ELEMENT (stmt_info)
2497 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2498 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2de001ee
RS
2499 {
2500 if (dump_enabled_p ())
2501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502 "not falling back to elementwise accesses\n");
2503 return false;
2504 }
2505 return true;
2506}
2507
aaeefd88 2508/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2509 conditional load or store STMT. When returning true, store the type
2510 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2511 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2512
2513static bool
929b4411
RS
2514vect_check_load_store_mask (gimple *stmt, tree mask,
2515 vect_def_type *mask_dt_out,
2516 tree *mask_vectype_out)
aaeefd88
RS
2517{
2518 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2519 {
2520 if (dump_enabled_p ())
2521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2522 "mask argument is not a boolean.\n");
2523 return false;
2524 }
2525
2526 if (TREE_CODE (mask) != SSA_NAME)
2527 {
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2530 "mask argument is not an SSA name.\n");
2531 return false;
2532 }
2533
2534 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2535 enum vect_def_type mask_dt;
aaeefd88 2536 tree mask_vectype;
894dd753 2537 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
aaeefd88
RS
2538 {
2539 if (dump_enabled_p ())
2540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2541 "mask use not simple.\n");
2542 return false;
2543 }
2544
2545 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2546 if (!mask_vectype)
2547 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2548
2549 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2550 {
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2553 "could not find an appropriate vector mask type.\n");
2554 return false;
2555 }
2556
2557 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2558 TYPE_VECTOR_SUBPARTS (vectype)))
2559 {
2560 if (dump_enabled_p ())
2561 {
2562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2563 "vector mask type ");
2564 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2565 dump_printf (MSG_MISSED_OPTIMIZATION,
2566 " does not match vector data type ");
2567 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2568 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2569 }
2570 return false;
2571 }
2572
929b4411 2573 *mask_dt_out = mask_dt;
aaeefd88
RS
2574 *mask_vectype_out = mask_vectype;
2575 return true;
2576}
2577
3133c3b6
RS
2578/* Return true if stored value RHS is suitable for vectorizing store
2579 statement STMT. When returning true, store the type of the
929b4411
RS
2580 definition in *RHS_DT_OUT, the type of the vectorized store value in
2581 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2582
2583static bool
929b4411
RS
2584vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2585 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2586{
2587 /* In the case this is a store from a constant make sure
2588 native_encode_expr can handle it. */
2589 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2590 {
2591 if (dump_enabled_p ())
2592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2593 "cannot encode constant as a byte sequence.\n");
2594 return false;
2595 }
2596
2597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2598 enum vect_def_type rhs_dt;
3133c3b6 2599 tree rhs_vectype;
894dd753 2600 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
3133c3b6
RS
2601 {
2602 if (dump_enabled_p ())
2603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2604 "use not simple.\n");
2605 return false;
2606 }
2607
2608 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2609 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2610 {
2611 if (dump_enabled_p ())
2612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2613 "incompatible vector types.\n");
2614 return false;
2615 }
2616
929b4411 2617 *rhs_dt_out = rhs_dt;
3133c3b6 2618 *rhs_vectype_out = rhs_vectype;
929b4411 2619 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2620 *vls_type_out = VLS_STORE_INVARIANT;
2621 else
2622 *vls_type_out = VLS_STORE;
2623 return true;
2624}
2625
bc9587eb
RS
2626/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2627 Note that we support masks with floating-point type, in which case the
2628 floats are interpreted as a bitmask. */
2629
2630static tree
2631vect_build_all_ones_mask (gimple *stmt, tree masktype)
2632{
2633 if (TREE_CODE (masktype) == INTEGER_TYPE)
2634 return build_int_cst (masktype, -1);
2635 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2636 {
2637 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2638 mask = build_vector_from_val (masktype, mask);
2639 return vect_init_vector (stmt, mask, masktype, NULL);
2640 }
2641 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2642 {
2643 REAL_VALUE_TYPE r;
2644 long tmp[6];
2645 for (int j = 0; j < 6; ++j)
2646 tmp[j] = -1;
2647 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2648 tree mask = build_real (TREE_TYPE (masktype), r);
2649 mask = build_vector_from_val (masktype, mask);
2650 return vect_init_vector (stmt, mask, masktype, NULL);
2651 }
2652 gcc_unreachable ();
2653}
2654
2655/* Build an all-zero merge value of type VECTYPE while vectorizing
2656 STMT as a gather load. */
2657
2658static tree
2659vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2660{
2661 tree merge;
2662 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2663 merge = build_int_cst (TREE_TYPE (vectype), 0);
2664 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2665 {
2666 REAL_VALUE_TYPE r;
2667 long tmp[6];
2668 for (int j = 0; j < 6; ++j)
2669 tmp[j] = 0;
2670 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2671 merge = build_real (TREE_TYPE (vectype), r);
2672 }
2673 else
2674 gcc_unreachable ();
2675 merge = build_vector_from_val (vectype, merge);
2676 return vect_init_vector (stmt, merge, vectype, NULL);
2677}
2678
c48d2d35
RS
2679/* Build a gather load call while vectorizing STMT. Insert new instructions
2680 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2681 operation. If the load is conditional, MASK is the unvectorized
929b4411 2682 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2683
2684static void
2685vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2686 gimple **vec_stmt, gather_scatter_info *gs_info,
929b4411 2687 tree mask, vect_def_type mask_dt)
c48d2d35
RS
2688{
2689 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2690 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2691 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2692 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2693 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2694 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2695 edge pe = loop_preheader_edge (loop);
2696 enum { NARROW, NONE, WIDEN } modifier;
2697 poly_uint64 gather_off_nunits
2698 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2699
2700 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2701 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2702 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2703 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2704 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2705 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2706 tree scaletype = TREE_VALUE (arglist);
2707 gcc_checking_assert (types_compatible_p (srctype, rettype)
2708 && (!mask || types_compatible_p (srctype, masktype)));
2709
2710 tree perm_mask = NULL_TREE;
2711 tree mask_perm_mask = NULL_TREE;
2712 if (known_eq (nunits, gather_off_nunits))
2713 modifier = NONE;
2714 else if (known_eq (nunits * 2, gather_off_nunits))
2715 {
2716 modifier = WIDEN;
2717
2718 /* Currently widening gathers and scatters are only supported for
2719 fixed-length vectors. */
2720 int count = gather_off_nunits.to_constant ();
2721 vec_perm_builder sel (count, count, 1);
2722 for (int i = 0; i < count; ++i)
2723 sel.quick_push (i | (count / 2));
2724
2725 vec_perm_indices indices (sel, 1, count);
2726 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2727 indices);
2728 }
2729 else if (known_eq (nunits, gather_off_nunits * 2))
2730 {
2731 modifier = NARROW;
2732
2733 /* Currently narrowing gathers and scatters are only supported for
2734 fixed-length vectors. */
2735 int count = nunits.to_constant ();
2736 vec_perm_builder sel (count, count, 1);
2737 sel.quick_grow (count);
2738 for (int i = 0; i < count; ++i)
2739 sel[i] = i < count / 2 ? i : i + count / 2;
2740 vec_perm_indices indices (sel, 2, count);
2741 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2742
2743 ncopies *= 2;
2744
2745 if (mask)
2746 {
2747 for (int i = 0; i < count; ++i)
2748 sel[i] = i | (count / 2);
2749 indices.new_vector (sel, 2, count);
2750 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2751 }
2752 }
2753 else
2754 gcc_unreachable ();
2755
2756 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2757 vectype);
2758
2759 tree ptr = fold_convert (ptrtype, gs_info->base);
2760 if (!is_gimple_min_invariant (ptr))
2761 {
2762 gimple_seq seq;
2763 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2764 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2765 gcc_assert (!new_bb);
2766 }
2767
2768 tree scale = build_int_cst (scaletype, gs_info->scale);
2769
2770 tree vec_oprnd0 = NULL_TREE;
2771 tree vec_mask = NULL_TREE;
2772 tree src_op = NULL_TREE;
2773 tree mask_op = NULL_TREE;
2774 tree prev_res = NULL_TREE;
2775 stmt_vec_info prev_stmt_info = NULL;
2776
2777 if (!mask)
2778 {
2779 src_op = vect_build_zero_merge_argument (stmt, rettype);
2780 mask_op = vect_build_all_ones_mask (stmt, masktype);
2781 }
2782
2783 for (int j = 0; j < ncopies; ++j)
2784 {
2785 tree op, var;
2786 gimple *new_stmt;
2787 if (modifier == WIDEN && (j & 1))
2788 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2789 perm_mask, stmt, gsi);
2790 else if (j == 0)
2791 op = vec_oprnd0
2792 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2793 else
2794 op = vec_oprnd0
2795 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2796
2797 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2798 {
2799 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2800 TYPE_VECTOR_SUBPARTS (idxtype)));
2801 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2802 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2803 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2804 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2805 op = var;
2806 }
2807
2808 if (mask)
2809 {
2810 if (mask_perm_mask && (j & 1))
2811 mask_op = permute_vec_elements (mask_op, mask_op,
2812 mask_perm_mask, stmt, gsi);
2813 else
2814 {
2815 if (j == 0)
2816 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2817 else
929b4411 2818 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2819
2820 mask_op = vec_mask;
2821 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2822 {
2823 gcc_assert
2824 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2825 TYPE_VECTOR_SUBPARTS (masktype)));
2826 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2827 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2828 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2829 mask_op);
2830 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2831 mask_op = var;
2832 }
2833 }
2834 src_op = mask_op;
2835 }
2836
2837 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2838 mask_op, scale);
2839
2840 if (!useless_type_conversion_p (vectype, rettype))
2841 {
2842 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2843 TYPE_VECTOR_SUBPARTS (rettype)));
2844 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2845 gimple_call_set_lhs (new_stmt, op);
2846 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2847 var = make_ssa_name (vec_dest);
2848 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2849 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2850 }
2851 else
2852 {
2853 var = make_ssa_name (vec_dest, new_stmt);
2854 gimple_call_set_lhs (new_stmt, var);
2855 }
2856
2857 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2858
2859 if (modifier == NARROW)
2860 {
2861 if ((j & 1) == 0)
2862 {
2863 prev_res = var;
2864 continue;
2865 }
2866 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2867 new_stmt = SSA_NAME_DEF_STMT (var);
2868 }
2869
2870 if (prev_stmt_info == NULL)
2871 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2872 else
2873 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2874 prev_stmt_info = vinfo_for_stmt (new_stmt);
2875 }
2876}
2877
bfaa08b7
RS
2878/* Prepare the base and offset in GS_INFO for vectorization.
2879 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2880 to the vectorized offset argument for the first copy of STMT. STMT
2881 is the statement described by GS_INFO and LOOP is the containing loop. */
2882
2883static void
2884vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2885 gather_scatter_info *gs_info,
2886 tree *dataref_ptr, tree *vec_offset)
2887{
2888 gimple_seq stmts = NULL;
2889 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2890 if (stmts != NULL)
2891 {
2892 basic_block new_bb;
2893 edge pe = loop_preheader_edge (loop);
2894 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2895 gcc_assert (!new_bb);
2896 }
2897 tree offset_type = TREE_TYPE (gs_info->offset);
2898 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2899 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2900 offset_vectype);
2901}
2902
ab2fc782
RS
2903/* Prepare to implement a grouped or strided load or store using
2904 the gather load or scatter store operation described by GS_INFO.
2905 STMT is the load or store statement.
2906
2907 Set *DATAREF_BUMP to the amount that should be added to the base
2908 address after each copy of the vectorized statement. Set *VEC_OFFSET
2909 to an invariant offset vector in which element I has the value
2910 I * DR_STEP / SCALE. */
2911
2912static void
2913vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2914 gather_scatter_info *gs_info,
2915 tree *dataref_bump, tree *vec_offset)
2916{
2917 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2918 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2919 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2920 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2921 gimple_seq stmts;
2922
2923 tree bump = size_binop (MULT_EXPR,
2924 fold_convert (sizetype, DR_STEP (dr)),
2925 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2926 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2927 if (stmts)
2928 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2929
2930 /* The offset given in GS_INFO can have pointer type, so use the element
2931 type of the vector instead. */
2932 tree offset_type = TREE_TYPE (gs_info->offset);
2933 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2934 offset_type = TREE_TYPE (offset_vectype);
2935
2936 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2937 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2938 ssize_int (gs_info->scale));
2939 step = fold_convert (offset_type, step);
2940 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2941
2942 /* Create {0, X, X*2, X*3, ...}. */
2943 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2944 build_zero_cst (offset_type), step);
2945 if (stmts)
2946 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2947}
2948
2949/* Return the amount that should be added to a vector pointer to move
2950 to the next or previous copy of AGGR_TYPE. DR is the data reference
2951 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2952 vectorization. */
2953
2954static tree
2955vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2956 vect_memory_access_type memory_access_type)
2957{
2958 if (memory_access_type == VMAT_INVARIANT)
2959 return size_zero_node;
2960
2961 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2962 tree step = vect_dr_behavior (dr)->step;
2963 if (tree_int_cst_sgn (step) == -1)
2964 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2965 return iv_step;
2966}
2967
37b14185
RB
2968/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2969
2970static bool
2971vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2972 gimple **vec_stmt, slp_tree slp_node,
68435eb2
RB
2973 tree vectype_in, enum vect_def_type *dt,
2974 stmt_vector_for_cost *cost_vec)
37b14185
RB
2975{
2976 tree op, vectype;
2977 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2978 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2979 unsigned ncopies;
2980 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2981
2982 op = gimple_call_arg (stmt, 0);
2983 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2984
2985 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2986 return false;
37b14185
RB
2987
2988 /* Multiple types in SLP are handled by creating the appropriate number of
2989 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2990 case of SLP. */
2991 if (slp_node)
2992 ncopies = 1;
2993 else
e8f142e2 2994 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2995
2996 gcc_assert (ncopies >= 1);
2997
2998 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2999 if (! char_vectype)
3000 return false;
3001
928686b1
RS
3002 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3003 return false;
3004
794e3180 3005 unsigned word_bytes = num_bytes / nunits;
908a1a16 3006
d980067b
RS
3007 /* The encoding uses one stepped pattern for each byte in the word. */
3008 vec_perm_builder elts (num_bytes, word_bytes, 3);
3009 for (unsigned i = 0; i < 3; ++i)
37b14185 3010 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 3011 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 3012
e3342de4
RS
3013 vec_perm_indices indices (elts, 1, num_bytes);
3014 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
3015 return false;
3016
3017 if (! vec_stmt)
3018 {
3019 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3020 DUMP_VECT_SCOPE ("vectorizable_bswap");
78604de0 3021 if (! slp_node)
37b14185 3022 {
68435eb2
RB
3023 record_stmt_cost (cost_vec,
3024 1, vector_stmt, stmt_info, 0, vect_prologue);
3025 record_stmt_cost (cost_vec,
3026 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
3027 }
3028 return true;
3029 }
3030
736d0f28 3031 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
3032
3033 /* Transform. */
3034 vec<tree> vec_oprnds = vNULL;
3035 gimple *new_stmt = NULL;
3036 stmt_vec_info prev_stmt_info = NULL;
3037 for (unsigned j = 0; j < ncopies; j++)
3038 {
3039 /* Handle uses. */
3040 if (j == 0)
306b0c92 3041 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
3042 else
3043 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3044
3045 /* Arguments are ready. create the new vector stmt. */
3046 unsigned i;
3047 tree vop;
3048 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3049 {
3050 tree tem = make_ssa_name (char_vectype);
3051 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3052 char_vectype, vop));
3053 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3054 tree tem2 = make_ssa_name (char_vectype);
3055 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3056 tem, tem, bswap_vconst);
3057 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3058 tem = make_ssa_name (vectype);
3059 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3060 vectype, tem2));
3061 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3062 if (slp_node)
3063 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3064 }
3065
3066 if (slp_node)
3067 continue;
3068
3069 if (j == 0)
3070 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3071 else
3072 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3073
3074 prev_stmt_info = vinfo_for_stmt (new_stmt);
3075 }
3076
3077 vec_oprnds.release ();
3078 return true;
3079}
3080
b1b6836e
RS
3081/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3082 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3083 in a single step. On success, store the binary pack code in
3084 *CONVERT_CODE. */
3085
3086static bool
3087simple_integer_narrowing (tree vectype_out, tree vectype_in,
3088 tree_code *convert_code)
3089{
3090 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3091 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3092 return false;
3093
3094 tree_code code;
3095 int multi_step_cvt = 0;
3096 auto_vec <tree, 8> interm_types;
3097 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3098 &code, &multi_step_cvt,
3099 &interm_types)
3100 || multi_step_cvt)
3101 return false;
3102
3103 *convert_code = code;
3104 return true;
3105}
5ce9450f 3106
ebfd146a
IR
3107/* Function vectorizable_call.
3108
538dd0b7 3109 Check if GS performs a function call that can be vectorized.
b8698a0f 3110 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3111 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3112 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3113
3114static bool
355fe088 3115vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 3116 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 3117{
538dd0b7 3118 gcall *stmt;
ebfd146a
IR
3119 tree vec_dest;
3120 tree scalar_dest;
3121 tree op, type;
3122 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3123 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3124 tree vectype_out, vectype_in;
c7bda0f4
RS
3125 poly_uint64 nunits_in;
3126 poly_uint64 nunits_out;
ebfd146a 3127 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3128 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3129 vec_info *vinfo = stmt_info->vinfo;
81c40241 3130 tree fndecl, new_temp, rhs_type;
0502fb85
UB
3131 enum vect_def_type dt[3]
3132 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 3133 int ndts = 3;
355fe088 3134 gimple *new_stmt = NULL;
ebfd146a 3135 int ncopies, j;
6e1aa848 3136 vec<tree> vargs = vNULL;
ebfd146a
IR
3137 enum { NARROW, NONE, WIDEN } modifier;
3138 size_t i, nargs;
9d5e7640 3139 tree lhs;
ebfd146a 3140
190c2236 3141 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3142 return false;
3143
66c16fd9
RB
3144 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3145 && ! vec_stmt)
ebfd146a
IR
3146 return false;
3147
538dd0b7
DM
3148 /* Is GS a vectorizable call? */
3149 stmt = dyn_cast <gcall *> (gs);
3150 if (!stmt)
ebfd146a
IR
3151 return false;
3152
5ce9450f 3153 if (gimple_call_internal_p (stmt)
bfaa08b7 3154 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3155 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3156 /* Handled by vectorizable_load and vectorizable_store. */
3157 return false;
5ce9450f 3158
0136f8f0
AH
3159 if (gimple_call_lhs (stmt) == NULL_TREE
3160 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3161 return false;
3162
0136f8f0 3163 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3164
b690cc0f
RG
3165 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3166
ebfd146a
IR
3167 /* Process function arguments. */
3168 rhs_type = NULL_TREE;
b690cc0f 3169 vectype_in = NULL_TREE;
ebfd146a
IR
3170 nargs = gimple_call_num_args (stmt);
3171
1b1562a5
MM
3172 /* Bail out if the function has more than three arguments, we do not have
3173 interesting builtin functions to vectorize with more than two arguments
3174 except for fma. No arguments is also not good. */
3175 if (nargs == 0 || nargs > 3)
ebfd146a
IR
3176 return false;
3177
74bf76ed
JJ
3178 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3179 if (gimple_call_internal_p (stmt)
3180 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3181 {
3182 nargs = 0;
3183 rhs_type = unsigned_type_node;
3184 }
3185
ebfd146a
IR
3186 for (i = 0; i < nargs; i++)
3187 {
b690cc0f
RG
3188 tree opvectype;
3189
ebfd146a
IR
3190 op = gimple_call_arg (stmt, i);
3191
3192 /* We can only handle calls with arguments of the same type. */
3193 if (rhs_type
8533c9d8 3194 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3195 {
73fbfcad 3196 if (dump_enabled_p ())
78c60e3d 3197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3198 "argument types differ.\n");
ebfd146a
IR
3199 return false;
3200 }
b690cc0f
RG
3201 if (!rhs_type)
3202 rhs_type = TREE_TYPE (op);
ebfd146a 3203
894dd753 3204 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
ebfd146a 3205 {
73fbfcad 3206 if (dump_enabled_p ())
78c60e3d 3207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3208 "use not simple.\n");
ebfd146a
IR
3209 return false;
3210 }
ebfd146a 3211
b690cc0f
RG
3212 if (!vectype_in)
3213 vectype_in = opvectype;
3214 else if (opvectype
3215 && opvectype != vectype_in)
3216 {
73fbfcad 3217 if (dump_enabled_p ())
78c60e3d 3218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3219 "argument vector types differ.\n");
b690cc0f
RG
3220 return false;
3221 }
3222 }
3223 /* If all arguments are external or constant defs use a vector type with
3224 the same size as the output vector type. */
ebfd146a 3225 if (!vectype_in)
b690cc0f 3226 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3227 if (vec_stmt)
3228 gcc_assert (vectype_in);
3229 if (!vectype_in)
3230 {
73fbfcad 3231 if (dump_enabled_p ())
7d8930a0 3232 {
78c60e3d
SS
3233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3234 "no vectype for scalar type ");
3235 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3236 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3237 }
3238
3239 return false;
3240 }
ebfd146a
IR
3241
3242 /* FORNOW */
b690cc0f
RG
3243 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3244 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3245 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3246 modifier = NARROW;
c7bda0f4 3247 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3248 modifier = NONE;
c7bda0f4 3249 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3250 modifier = WIDEN;
3251 else
3252 return false;
3253
70439f0d
RS
3254 /* We only handle functions that do not read or clobber memory. */
3255 if (gimple_vuse (stmt))
3256 {
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3259 "function reads from or writes to memory.\n");
3260 return false;
3261 }
3262
ebfd146a
IR
3263 /* For now, we only vectorize functions if a target specific builtin
3264 is available. TODO -- in some cases, it might be profitable to
3265 insert the calls for pieces of the vector, in order to be able
3266 to vectorize other operations in the loop. */
70439f0d
RS
3267 fndecl = NULL_TREE;
3268 internal_fn ifn = IFN_LAST;
3269 combined_fn cfn = gimple_call_combined_fn (stmt);
3270 tree callee = gimple_call_fndecl (stmt);
3271
3272 /* First try using an internal function. */
b1b6836e
RS
3273 tree_code convert_code = ERROR_MARK;
3274 if (cfn != CFN_LAST
3275 && (modifier == NONE
3276 || (modifier == NARROW
3277 && simple_integer_narrowing (vectype_out, vectype_in,
3278 &convert_code))))
70439f0d
RS
3279 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3280 vectype_in);
3281
3282 /* If that fails, try asking for a target-specific built-in function. */
3283 if (ifn == IFN_LAST)
3284 {
3285 if (cfn != CFN_LAST)
3286 fndecl = targetm.vectorize.builtin_vectorized_function
3287 (cfn, vectype_out, vectype_in);
7672aa9b 3288 else if (callee)
70439f0d
RS
3289 fndecl = targetm.vectorize.builtin_md_vectorized_function
3290 (callee, vectype_out, vectype_in);
3291 }
3292
3293 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3294 {
70439f0d 3295 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3296 && !slp_node
3297 && loop_vinfo
3298 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3299 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3300 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3301 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3302 {
3303 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3304 { 0, 1, 2, ... vf - 1 } vector. */
3305 gcc_assert (nargs == 0);
3306 }
37b14185
RB
3307 else if (modifier == NONE
3308 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3309 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3310 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3311 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
68435eb2 3312 vectype_in, dt, cost_vec);
74bf76ed
JJ
3313 else
3314 {
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3317 "function is not vectorizable.\n");
74bf76ed
JJ
3318 return false;
3319 }
ebfd146a
IR
3320 }
3321
fce57248 3322 if (slp_node)
190c2236 3323 ncopies = 1;
b1b6836e 3324 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3325 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3326 else
e8f142e2 3327 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3328
3329 /* Sanity check: make sure that at least one copy of the vectorized stmt
3330 needs to be generated. */
3331 gcc_assert (ncopies >= 1);
3332
3333 if (!vec_stmt) /* transformation not required. */
3334 {
3335 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3336 DUMP_VECT_SCOPE ("vectorizable_call");
68435eb2
RB
3337 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3338 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3339 record_stmt_cost (cost_vec, ncopies / 2,
3340 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3341
ebfd146a
IR
3342 return true;
3343 }
3344
67b8dbac 3345 /* Transform. */
ebfd146a 3346
73fbfcad 3347 if (dump_enabled_p ())
e645e942 3348 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3349
3350 /* Handle def. */
3351 scalar_dest = gimple_call_lhs (stmt);
3352 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3353
3354 prev_stmt_info = NULL;
b1b6836e 3355 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3356 {
b1b6836e 3357 tree prev_res = NULL_TREE;
ebfd146a
IR
3358 for (j = 0; j < ncopies; ++j)
3359 {
3360 /* Build argument list for the vectorized call. */
3361 if (j == 0)
9771b263 3362 vargs.create (nargs);
ebfd146a 3363 else
9771b263 3364 vargs.truncate (0);
ebfd146a 3365
190c2236
JJ
3366 if (slp_node)
3367 {
ef062b13 3368 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3369 vec<tree> vec_oprnds0;
190c2236
JJ
3370
3371 for (i = 0; i < nargs; i++)
9771b263 3372 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3373 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3374 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3375
3376 /* Arguments are ready. Create the new vector stmt. */
9771b263 3377 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3378 {
3379 size_t k;
3380 for (k = 0; k < nargs; k++)
3381 {
37b5ec8f 3382 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3383 vargs[k] = vec_oprndsk[i];
190c2236 3384 }
b1b6836e
RS
3385 if (modifier == NARROW)
3386 {
3387 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3388 gcall *call
3389 = gimple_build_call_internal_vec (ifn, vargs);
3390 gimple_call_set_lhs (call, half_res);
3391 gimple_call_set_nothrow (call, true);
3392 new_stmt = call;
b1b6836e
RS
3393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3394 if ((i & 1) == 0)
3395 {
3396 prev_res = half_res;
3397 continue;
3398 }
3399 new_temp = make_ssa_name (vec_dest);
3400 new_stmt = gimple_build_assign (new_temp, convert_code,
3401 prev_res, half_res);
3402 }
70439f0d 3403 else
b1b6836e 3404 {
a844293d 3405 gcall *call;
b1b6836e 3406 if (ifn != IFN_LAST)
a844293d 3407 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3408 else
a844293d
RS
3409 call = gimple_build_call_vec (fndecl, vargs);
3410 new_temp = make_ssa_name (vec_dest, call);
3411 gimple_call_set_lhs (call, new_temp);
3412 gimple_call_set_nothrow (call, true);
3413 new_stmt = call;
b1b6836e 3414 }
190c2236 3415 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3416 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3417 }
3418
3419 for (i = 0; i < nargs; i++)
3420 {
37b5ec8f 3421 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3422 vec_oprndsi.release ();
190c2236 3423 }
190c2236
JJ
3424 continue;
3425 }
3426
ebfd146a
IR
3427 for (i = 0; i < nargs; i++)
3428 {
3429 op = gimple_call_arg (stmt, i);
3430 if (j == 0)
3431 vec_oprnd0
81c40241 3432 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3433 else
63827fb8
IR
3434 {
3435 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3436 vec_oprnd0
3437 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3438 }
ebfd146a 3439
9771b263 3440 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
3441 }
3442
74bf76ed
JJ
3443 if (gimple_call_internal_p (stmt)
3444 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3445 {
c7bda0f4 3446 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3447 tree new_var
0e22bb5a 3448 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3449 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3450 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3451 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3452 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3453 }
b1b6836e
RS
3454 else if (modifier == NARROW)
3455 {
3456 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3457 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3458 gimple_call_set_lhs (call, half_res);
3459 gimple_call_set_nothrow (call, true);
3460 new_stmt = call;
b1b6836e
RS
3461 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3462 if ((j & 1) == 0)
3463 {
3464 prev_res = half_res;
3465 continue;
3466 }
3467 new_temp = make_ssa_name (vec_dest);
3468 new_stmt = gimple_build_assign (new_temp, convert_code,
3469 prev_res, half_res);
3470 }
74bf76ed
JJ
3471 else
3472 {
a844293d 3473 gcall *call;
70439f0d 3474 if (ifn != IFN_LAST)
a844293d 3475 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3476 else
a844293d 3477 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3478 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3479 gimple_call_set_lhs (call, new_temp);
3480 gimple_call_set_nothrow (call, true);
3481 new_stmt = call;
74bf76ed 3482 }
ebfd146a
IR
3483 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3484
b1b6836e 3485 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3486 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3487 else
3488 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3489
3490 prev_stmt_info = vinfo_for_stmt (new_stmt);
3491 }
b1b6836e
RS
3492 }
3493 else if (modifier == NARROW)
3494 {
ebfd146a
IR
3495 for (j = 0; j < ncopies; ++j)
3496 {
3497 /* Build argument list for the vectorized call. */
3498 if (j == 0)
9771b263 3499 vargs.create (nargs * 2);
ebfd146a 3500 else
9771b263 3501 vargs.truncate (0);
ebfd146a 3502
190c2236
JJ
3503 if (slp_node)
3504 {
ef062b13 3505 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3506 vec<tree> vec_oprnds0;
190c2236
JJ
3507
3508 for (i = 0; i < nargs; i++)
9771b263 3509 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3510 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3511 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3512
3513 /* Arguments are ready. Create the new vector stmt. */
9771b263 3514 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3515 {
3516 size_t k;
9771b263 3517 vargs.truncate (0);
190c2236
JJ
3518 for (k = 0; k < nargs; k++)
3519 {
37b5ec8f 3520 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3521 vargs.quick_push (vec_oprndsk[i]);
3522 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3523 }
a844293d 3524 gcall *call;
70439f0d 3525 if (ifn != IFN_LAST)
a844293d 3526 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3527 else
a844293d
RS
3528 call = gimple_build_call_vec (fndecl, vargs);
3529 new_temp = make_ssa_name (vec_dest, call);
3530 gimple_call_set_lhs (call, new_temp);
3531 gimple_call_set_nothrow (call, true);
3532 new_stmt = call;
190c2236 3533 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3534 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3535 }
3536
3537 for (i = 0; i < nargs; i++)
3538 {
37b5ec8f 3539 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3540 vec_oprndsi.release ();
190c2236 3541 }
190c2236
JJ
3542 continue;
3543 }
3544
ebfd146a
IR
3545 for (i = 0; i < nargs; i++)
3546 {
3547 op = gimple_call_arg (stmt, i);
3548 if (j == 0)
3549 {
3550 vec_oprnd0
81c40241 3551 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3552 vec_oprnd1
63827fb8 3553 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3554 }
3555 else
3556 {
336ecb65 3557 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3558 vec_oprnd0
63827fb8 3559 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3560 vec_oprnd1
63827fb8 3561 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3562 }
3563
9771b263
DN
3564 vargs.quick_push (vec_oprnd0);
3565 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3566 }
3567
b1b6836e 3568 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3569 new_temp = make_ssa_name (vec_dest, new_stmt);
3570 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3571 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3572
3573 if (j == 0)
3574 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3575 else
3576 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3577
3578 prev_stmt_info = vinfo_for_stmt (new_stmt);
3579 }
3580
3581 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3582 }
b1b6836e
RS
3583 else
3584 /* No current target implements this case. */
3585 return false;
ebfd146a 3586
9771b263 3587 vargs.release ();
ebfd146a 3588
ebfd146a
IR
3589 /* The call in STMT might prevent it from being removed in dce.
3590 We however cannot remove it here, due to the way the ssa name
3591 it defines is mapped to the new definition. So just replace
3592 rhs of the statement with something harmless. */
3593
dd34c087
JJ
3594 if (slp_node)
3595 return true;
3596
ebfd146a 3597 type = TREE_TYPE (scalar_dest);
9d5e7640 3598 if (is_pattern_stmt_p (stmt_info))
ed7b8123
RS
3599 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3600 lhs = gimple_get_lhs (stmt_info->stmt);
3cc2fa2a 3601
9d5e7640 3602 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3603 set_vinfo_for_stmt (new_stmt, stmt_info);
ed7b8123 3604 set_vinfo_for_stmt (stmt_info->stmt, NULL);
ebfd146a
IR
3605 STMT_VINFO_STMT (stmt_info) = new_stmt;
3606 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3607
3608 return true;
3609}
3610
3611
0136f8f0
AH
3612struct simd_call_arg_info
3613{
3614 tree vectype;
3615 tree op;
0136f8f0 3616 HOST_WIDE_INT linear_step;
34e82342 3617 enum vect_def_type dt;
0136f8f0 3618 unsigned int align;
17b658af 3619 bool simd_lane_linear;
0136f8f0
AH
3620};
3621
17b658af
JJ
3622/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3623 is linear within simd lane (but not within whole loop), note it in
3624 *ARGINFO. */
3625
3626static void
3627vect_simd_lane_linear (tree op, struct loop *loop,
3628 struct simd_call_arg_info *arginfo)
3629{
355fe088 3630 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3631
3632 if (!is_gimple_assign (def_stmt)
3633 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3634 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3635 return;
3636
3637 tree base = gimple_assign_rhs1 (def_stmt);
3638 HOST_WIDE_INT linear_step = 0;
3639 tree v = gimple_assign_rhs2 (def_stmt);
3640 while (TREE_CODE (v) == SSA_NAME)
3641 {
3642 tree t;
3643 def_stmt = SSA_NAME_DEF_STMT (v);
3644 if (is_gimple_assign (def_stmt))
3645 switch (gimple_assign_rhs_code (def_stmt))
3646 {
3647 case PLUS_EXPR:
3648 t = gimple_assign_rhs2 (def_stmt);
3649 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3650 return;
3651 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3652 v = gimple_assign_rhs1 (def_stmt);
3653 continue;
3654 case MULT_EXPR:
3655 t = gimple_assign_rhs2 (def_stmt);
3656 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3657 return;
3658 linear_step = tree_to_shwi (t);
3659 v = gimple_assign_rhs1 (def_stmt);
3660 continue;
3661 CASE_CONVERT:
3662 t = gimple_assign_rhs1 (def_stmt);
3663 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3664 || (TYPE_PRECISION (TREE_TYPE (v))
3665 < TYPE_PRECISION (TREE_TYPE (t))))
3666 return;
3667 if (!linear_step)
3668 linear_step = 1;
3669 v = t;
3670 continue;
3671 default:
3672 return;
3673 }
8e4284d0 3674 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3675 && loop->simduid
3676 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3677 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3678 == loop->simduid))
3679 {
3680 if (!linear_step)
3681 linear_step = 1;
3682 arginfo->linear_step = linear_step;
3683 arginfo->op = base;
3684 arginfo->simd_lane_linear = true;
3685 return;
3686 }
3687 }
3688}
3689
cf1b2ba4
RS
3690/* Return the number of elements in vector type VECTYPE, which is associated
3691 with a SIMD clone. At present these vectors always have a constant
3692 length. */
3693
3694static unsigned HOST_WIDE_INT
3695simd_clone_subparts (tree vectype)
3696{
928686b1 3697 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3698}
3699
0136f8f0
AH
3700/* Function vectorizable_simd_clone_call.
3701
3702 Check if STMT performs a function call that can be vectorized
3703 by calling a simd clone of the function.
3704 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3705 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3706 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3707
3708static bool
355fe088 3709vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
3710 gimple **vec_stmt, slp_tree slp_node,
3711 stmt_vector_for_cost *)
0136f8f0
AH
3712{
3713 tree vec_dest;
3714 tree scalar_dest;
3715 tree op, type;
3716 tree vec_oprnd0 = NULL_TREE;
3717 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3718 tree vectype;
3719 unsigned int nunits;
3720 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3721 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3722 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3723 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3724 tree fndecl, new_temp;
355fe088 3725 gimple *new_stmt = NULL;
0136f8f0 3726 int ncopies, j;
00426f9a 3727 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3728 vec<tree> vargs = vNULL;
3729 size_t i, nargs;
3730 tree lhs, rtype, ratype;
e7a74006 3731 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3732
3733 /* Is STMT a vectorizable call? */
3734 if (!is_gimple_call (stmt))
3735 return false;
3736
3737 fndecl = gimple_call_fndecl (stmt);
3738 if (fndecl == NULL_TREE)
3739 return false;
3740
d52f5295 3741 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3742 if (node == NULL || node->simd_clones == NULL)
3743 return false;
3744
3745 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3746 return false;
3747
66c16fd9
RB
3748 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3749 && ! vec_stmt)
0136f8f0
AH
3750 return false;
3751
3752 if (gimple_call_lhs (stmt)
3753 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3754 return false;
3755
3756 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3757
3758 vectype = STMT_VINFO_VECTYPE (stmt_info);
3759
3760 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3761 return false;
3762
3763 /* FORNOW */
fce57248 3764 if (slp_node)
0136f8f0
AH
3765 return false;
3766
3767 /* Process function arguments. */
3768 nargs = gimple_call_num_args (stmt);
3769
3770 /* Bail out if the function has zero arguments. */
3771 if (nargs == 0)
3772 return false;
3773
00426f9a 3774 arginfo.reserve (nargs, true);
0136f8f0
AH
3775
3776 for (i = 0; i < nargs; i++)
3777 {
3778 simd_call_arg_info thisarginfo;
3779 affine_iv iv;
3780
3781 thisarginfo.linear_step = 0;
3782 thisarginfo.align = 0;
3783 thisarginfo.op = NULL_TREE;
17b658af 3784 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3785
3786 op = gimple_call_arg (stmt, i);
894dd753 3787 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
81c40241 3788 &thisarginfo.vectype)
0136f8f0
AH
3789 || thisarginfo.dt == vect_uninitialized_def)
3790 {
3791 if (dump_enabled_p ())
3792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3793 "use not simple.\n");
0136f8f0
AH
3794 return false;
3795 }
3796
3797 if (thisarginfo.dt == vect_constant_def
3798 || thisarginfo.dt == vect_external_def)
3799 gcc_assert (thisarginfo.vectype == NULL_TREE);
3800 else
3801 gcc_assert (thisarginfo.vectype != NULL_TREE);
3802
6c9e85fb
JJ
3803 /* For linear arguments, the analyze phase should have saved
3804 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3805 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3806 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3807 {
3808 gcc_assert (vec_stmt);
3809 thisarginfo.linear_step
17b658af 3810 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3811 thisarginfo.op
17b658af
JJ
3812 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3813 thisarginfo.simd_lane_linear
3814 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3815 == boolean_true_node);
6c9e85fb
JJ
3816 /* If loop has been peeled for alignment, we need to adjust it. */
3817 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3818 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3819 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3820 {
3821 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3822 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3823 tree opt = TREE_TYPE (thisarginfo.op);
3824 bias = fold_convert (TREE_TYPE (step), bias);
3825 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3826 thisarginfo.op
3827 = fold_build2 (POINTER_TYPE_P (opt)
3828 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3829 thisarginfo.op, bias);
3830 }
3831 }
3832 else if (!vec_stmt
3833 && thisarginfo.dt != vect_constant_def
3834 && thisarginfo.dt != vect_external_def
3835 && loop_vinfo
3836 && TREE_CODE (op) == SSA_NAME
3837 && simple_iv (loop, loop_containing_stmt (stmt), op,
3838 &iv, false)
3839 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3840 {
3841 thisarginfo.linear_step = tree_to_shwi (iv.step);
3842 thisarginfo.op = iv.base;
3843 }
3844 else if ((thisarginfo.dt == vect_constant_def
3845 || thisarginfo.dt == vect_external_def)
3846 && POINTER_TYPE_P (TREE_TYPE (op)))
3847 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3848 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3849 linear too. */
3850 if (POINTER_TYPE_P (TREE_TYPE (op))
3851 && !thisarginfo.linear_step
3852 && !vec_stmt
3853 && thisarginfo.dt != vect_constant_def
3854 && thisarginfo.dt != vect_external_def
3855 && loop_vinfo
3856 && !slp_node
3857 && TREE_CODE (op) == SSA_NAME)
3858 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3859
3860 arginfo.quick_push (thisarginfo);
3861 }
3862
d9f21f6a
RS
3863 unsigned HOST_WIDE_INT vf;
3864 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3865 {
3866 if (dump_enabled_p ())
3867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3868 "not considering SIMD clones; not yet supported"
3869 " for variable-width vectors.\n");
3870 return NULL;
3871 }
3872
0136f8f0
AH
3873 unsigned int badness = 0;
3874 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3875 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3876 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3877 else
3878 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3879 n = n->simdclone->next_clone)
3880 {
3881 unsigned int this_badness = 0;
d9f21f6a 3882 if (n->simdclone->simdlen > vf
0136f8f0
AH
3883 || n->simdclone->nargs != nargs)
3884 continue;
d9f21f6a
RS
3885 if (n->simdclone->simdlen < vf)
3886 this_badness += (exact_log2 (vf)
0136f8f0
AH
3887 - exact_log2 (n->simdclone->simdlen)) * 1024;
3888 if (n->simdclone->inbranch)
3889 this_badness += 2048;
3890 int target_badness = targetm.simd_clone.usable (n);
3891 if (target_badness < 0)
3892 continue;
3893 this_badness += target_badness * 512;
3894 /* FORNOW: Have to add code to add the mask argument. */
3895 if (n->simdclone->inbranch)
3896 continue;
3897 for (i = 0; i < nargs; i++)
3898 {
3899 switch (n->simdclone->args[i].arg_type)
3900 {
3901 case SIMD_CLONE_ARG_TYPE_VECTOR:
3902 if (!useless_type_conversion_p
3903 (n->simdclone->args[i].orig_type,
3904 TREE_TYPE (gimple_call_arg (stmt, i))))
3905 i = -1;
3906 else if (arginfo[i].dt == vect_constant_def
3907 || arginfo[i].dt == vect_external_def
3908 || arginfo[i].linear_step)
3909 this_badness += 64;
3910 break;
3911 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3912 if (arginfo[i].dt != vect_constant_def
3913 && arginfo[i].dt != vect_external_def)
3914 i = -1;
3915 break;
3916 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3917 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3918 if (arginfo[i].dt == vect_constant_def
3919 || arginfo[i].dt == vect_external_def
3920 || (arginfo[i].linear_step
3921 != n->simdclone->args[i].linear_step))
3922 i = -1;
3923 break;
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3928 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3929 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3930 /* FORNOW */
3931 i = -1;
3932 break;
3933 case SIMD_CLONE_ARG_TYPE_MASK:
3934 gcc_unreachable ();
3935 }
3936 if (i == (size_t) -1)
3937 break;
3938 if (n->simdclone->args[i].alignment > arginfo[i].align)
3939 {
3940 i = -1;
3941 break;
3942 }
3943 if (arginfo[i].align)
3944 this_badness += (exact_log2 (arginfo[i].align)
3945 - exact_log2 (n->simdclone->args[i].alignment));
3946 }
3947 if (i == (size_t) -1)
3948 continue;
3949 if (bestn == NULL || this_badness < badness)
3950 {
3951 bestn = n;
3952 badness = this_badness;
3953 }
3954 }
3955
3956 if (bestn == NULL)
00426f9a 3957 return false;
0136f8f0
AH
3958
3959 for (i = 0; i < nargs; i++)
3960 if ((arginfo[i].dt == vect_constant_def
3961 || arginfo[i].dt == vect_external_def)
3962 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3963 {
3964 arginfo[i].vectype
3965 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3966 i)));
3967 if (arginfo[i].vectype == NULL
cf1b2ba4 3968 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3969 > bestn->simdclone->simdlen))
00426f9a 3970 return false;
0136f8f0
AH
3971 }
3972
3973 fndecl = bestn->decl;
3974 nunits = bestn->simdclone->simdlen;
d9f21f6a 3975 ncopies = vf / nunits;
0136f8f0
AH
3976
3977 /* If the function isn't const, only allow it in simd loops where user
3978 has asserted that at least nunits consecutive iterations can be
3979 performed using SIMD instructions. */
3980 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3981 && gimple_vuse (stmt))
00426f9a 3982 return false;
0136f8f0
AH
3983
3984 /* Sanity check: make sure that at least one copy of the vectorized stmt
3985 needs to be generated. */
3986 gcc_assert (ncopies >= 1);
3987
3988 if (!vec_stmt) /* transformation not required. */
3989 {
6c9e85fb
JJ
3990 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3991 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3992 if ((bestn->simdclone->args[i].arg_type
3993 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3994 || (bestn->simdclone->args[i].arg_type
3995 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3996 {
17b658af 3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3998 + 1);
3999 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4000 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4001 ? size_type_node : TREE_TYPE (arginfo[i].op);
4002 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4003 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4004 tree sll = arginfo[i].simd_lane_linear
4005 ? boolean_true_node : boolean_false_node;
4006 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4007 }
0136f8f0 4008 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
adac3a68 4009 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
68435eb2 4010/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4011 return true;
4012 }
4013
67b8dbac 4014 /* Transform. */
0136f8f0
AH
4015
4016 if (dump_enabled_p ())
4017 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4018
4019 /* Handle def. */
4020 scalar_dest = gimple_call_lhs (stmt);
4021 vec_dest = NULL_TREE;
4022 rtype = NULL_TREE;
4023 ratype = NULL_TREE;
4024 if (scalar_dest)
4025 {
4026 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4027 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4028 if (TREE_CODE (rtype) == ARRAY_TYPE)
4029 {
4030 ratype = rtype;
4031 rtype = TREE_TYPE (ratype);
4032 }
4033 }
4034
4035 prev_stmt_info = NULL;
4036 for (j = 0; j < ncopies; ++j)
4037 {
4038 /* Build argument list for the vectorized call. */
4039 if (j == 0)
4040 vargs.create (nargs);
4041 else
4042 vargs.truncate (0);
4043
4044 for (i = 0; i < nargs; i++)
4045 {
4046 unsigned int k, l, m, o;
4047 tree atype;
4048 op = gimple_call_arg (stmt, i);
4049 switch (bestn->simdclone->args[i].arg_type)
4050 {
4051 case SIMD_CLONE_ARG_TYPE_VECTOR:
4052 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4053 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4054 for (m = j * o; m < (j + 1) * o; m++)
4055 {
cf1b2ba4
RS
4056 if (simd_clone_subparts (atype)
4057 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4058 {
73a699ae 4059 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4060 k = (simd_clone_subparts (arginfo[i].vectype)
4061 / simd_clone_subparts (atype));
0136f8f0
AH
4062 gcc_assert ((k & (k - 1)) == 0);
4063 if (m == 0)
4064 vec_oprnd0
81c40241 4065 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4066 else
4067 {
4068 vec_oprnd0 = arginfo[i].op;
4069 if ((m & (k - 1)) == 0)
4070 vec_oprnd0
4071 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4072 vec_oprnd0);
4073 }
4074 arginfo[i].op = vec_oprnd0;
4075 vec_oprnd0
4076 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4077 bitsize_int (prec),
0136f8f0
AH
4078 bitsize_int ((m & (k - 1)) * prec));
4079 new_stmt
b731b390 4080 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4081 vec_oprnd0);
4082 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4083 vargs.safe_push (gimple_assign_lhs (new_stmt));
4084 }
4085 else
4086 {
cf1b2ba4
RS
4087 k = (simd_clone_subparts (atype)
4088 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4089 gcc_assert ((k & (k - 1)) == 0);
4090 vec<constructor_elt, va_gc> *ctor_elts;
4091 if (k != 1)
4092 vec_alloc (ctor_elts, k);
4093 else
4094 ctor_elts = NULL;
4095 for (l = 0; l < k; l++)
4096 {
4097 if (m == 0 && l == 0)
4098 vec_oprnd0
81c40241 4099 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4100 else
4101 vec_oprnd0
4102 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4103 arginfo[i].op);
4104 arginfo[i].op = vec_oprnd0;
4105 if (k == 1)
4106 break;
4107 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4108 vec_oprnd0);
4109 }
4110 if (k == 1)
4111 vargs.safe_push (vec_oprnd0);
4112 else
4113 {
4114 vec_oprnd0 = build_constructor (atype, ctor_elts);
4115 new_stmt
b731b390 4116 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4117 vec_oprnd0);
4118 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4119 vargs.safe_push (gimple_assign_lhs (new_stmt));
4120 }
4121 }
4122 }
4123 break;
4124 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4125 vargs.safe_push (op);
4126 break;
4127 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4128 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4129 if (j == 0)
4130 {
4131 gimple_seq stmts;
4132 arginfo[i].op
4133 = force_gimple_operand (arginfo[i].op, &stmts, true,
4134 NULL_TREE);
4135 if (stmts != NULL)
4136 {
4137 basic_block new_bb;
4138 edge pe = loop_preheader_edge (loop);
4139 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4140 gcc_assert (!new_bb);
4141 }
17b658af
JJ
4142 if (arginfo[i].simd_lane_linear)
4143 {
4144 vargs.safe_push (arginfo[i].op);
4145 break;
4146 }
b731b390 4147 tree phi_res = copy_ssa_name (op);
538dd0b7 4148 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 4149 set_vinfo_for_stmt (new_phi,
310213d4 4150 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
4151 add_phi_arg (new_phi, arginfo[i].op,
4152 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4153 enum tree_code code
4154 = POINTER_TYPE_P (TREE_TYPE (op))
4155 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4156 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4157 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4158 widest_int cst
4159 = wi::mul (bestn->simdclone->args[i].linear_step,
4160 ncopies * nunits);
4161 tree tcst = wide_int_to_tree (type, cst);
b731b390 4162 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
4163 new_stmt
4164 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4165 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4166 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4167 set_vinfo_for_stmt (new_stmt,
310213d4 4168 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
4169 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4170 UNKNOWN_LOCATION);
4171 arginfo[i].op = phi_res;
4172 vargs.safe_push (phi_res);
4173 }
4174 else
4175 {
4176 enum tree_code code
4177 = POINTER_TYPE_P (TREE_TYPE (op))
4178 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4179 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4180 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4181 widest_int cst
4182 = wi::mul (bestn->simdclone->args[i].linear_step,
4183 j * nunits);
4184 tree tcst = wide_int_to_tree (type, cst);
b731b390 4185 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
4186 new_stmt = gimple_build_assign (new_temp, code,
4187 arginfo[i].op, tcst);
0136f8f0
AH
4188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4189 vargs.safe_push (new_temp);
4190 }
4191 break;
7adb26f2
JJ
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4194 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4198 default:
4199 gcc_unreachable ();
4200 }
4201 }
4202
4203 new_stmt = gimple_build_call_vec (fndecl, vargs);
4204 if (vec_dest)
4205 {
cf1b2ba4 4206 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4207 if (ratype)
b731b390 4208 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4209 else if (simd_clone_subparts (vectype)
4210 == simd_clone_subparts (rtype))
0136f8f0
AH
4211 new_temp = make_ssa_name (vec_dest, new_stmt);
4212 else
4213 new_temp = make_ssa_name (rtype, new_stmt);
4214 gimple_call_set_lhs (new_stmt, new_temp);
4215 }
4216 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4217
4218 if (vec_dest)
4219 {
cf1b2ba4 4220 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4221 {
4222 unsigned int k, l;
73a699ae
RS
4223 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4224 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4225 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4226 gcc_assert ((k & (k - 1)) == 0);
4227 for (l = 0; l < k; l++)
4228 {
4229 tree t;
4230 if (ratype)
4231 {
4232 t = build_fold_addr_expr (new_temp);
4233 t = build2 (MEM_REF, vectype, t,
73a699ae 4234 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4235 }
4236 else
4237 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4238 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 4239 new_stmt
b731b390 4240 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
4241 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4242 if (j == 0 && l == 0)
4243 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4244 else
4245 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4246
4247 prev_stmt_info = vinfo_for_stmt (new_stmt);
4248 }
4249
4250 if (ratype)
3ba4ff41 4251 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4252 continue;
4253 }
cf1b2ba4 4254 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4255 {
cf1b2ba4
RS
4256 unsigned int k = (simd_clone_subparts (vectype)
4257 / simd_clone_subparts (rtype));
0136f8f0
AH
4258 gcc_assert ((k & (k - 1)) == 0);
4259 if ((j & (k - 1)) == 0)
4260 vec_alloc (ret_ctor_elts, k);
4261 if (ratype)
4262 {
cf1b2ba4 4263 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4264 for (m = 0; m < o; m++)
4265 {
4266 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4267 size_int (m), NULL_TREE, NULL_TREE);
4268 new_stmt
b731b390 4269 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
4270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4271 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4272 gimple_assign_lhs (new_stmt));
4273 }
3ba4ff41 4274 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4275 }
4276 else
4277 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4278 if ((j & (k - 1)) != k - 1)
4279 continue;
4280 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4281 new_stmt
b731b390 4282 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
4283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4284
4285 if ((unsigned) j == k - 1)
4286 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4287 else
4288 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4289
4290 prev_stmt_info = vinfo_for_stmt (new_stmt);
4291 continue;
4292 }
4293 else if (ratype)
4294 {
4295 tree t = build_fold_addr_expr (new_temp);
4296 t = build2 (MEM_REF, vectype, t,
4297 build_int_cst (TREE_TYPE (t), 0));
4298 new_stmt
b731b390 4299 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0 4300 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4301 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4302 }
4303 }
4304
4305 if (j == 0)
4306 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4307 else
4308 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4309
4310 prev_stmt_info = vinfo_for_stmt (new_stmt);
4311 }
4312
4313 vargs.release ();
4314
4315 /* The call in STMT might prevent it from being removed in dce.
4316 We however cannot remove it here, due to the way the ssa name
4317 it defines is mapped to the new definition. So just replace
4318 rhs of the statement with something harmless. */
4319
4320 if (slp_node)
4321 return true;
4322
4323 if (scalar_dest)
4324 {
4325 type = TREE_TYPE (scalar_dest);
4326 if (is_pattern_stmt_p (stmt_info))
4327 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4328 else
4329 lhs = gimple_call_lhs (stmt);
4330 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4331 }
4332 else
4333 new_stmt = gimple_build_nop ();
4334 set_vinfo_for_stmt (new_stmt, stmt_info);
4335 set_vinfo_for_stmt (stmt, NULL);
4336 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4337 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4338 unlink_stmt_vdef (stmt);
4339
4340 return true;
4341}
4342
4343
ebfd146a
IR
4344/* Function vect_gen_widened_results_half
4345
4346 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4347 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4348 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4349 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4350 needs to be created (DECL is a function-decl of a target-builtin).
4351 STMT is the original scalar stmt that we are vectorizing. */
4352
355fe088 4353static gimple *
ebfd146a
IR
4354vect_gen_widened_results_half (enum tree_code code,
4355 tree decl,
4356 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4357 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4358 gimple *stmt)
b8698a0f 4359{
355fe088 4360 gimple *new_stmt;
b8698a0f
L
4361 tree new_temp;
4362
4363 /* Generate half of the widened result: */
4364 if (code == CALL_EXPR)
4365 {
4366 /* Target specific support */
ebfd146a
IR
4367 if (op_type == binary_op)
4368 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4369 else
4370 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4371 new_temp = make_ssa_name (vec_dest, new_stmt);
4372 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4373 }
4374 else
ebfd146a 4375 {
b8698a0f
L
4376 /* Generic support */
4377 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4378 if (op_type != binary_op)
4379 vec_oprnd1 = NULL;
0d0e4a03 4380 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4381 new_temp = make_ssa_name (vec_dest, new_stmt);
4382 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4383 }
ebfd146a
IR
4384 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4385
ebfd146a
IR
4386 return new_stmt;
4387}
4388
4a00c761
JJ
4389
4390/* Get vectorized definitions for loop-based vectorization. For the first
4391 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4392 scalar operand), and for the rest we get a copy with
4393 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4394 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4395 The vectors are collected into VEC_OPRNDS. */
4396
4397static void
355fe088 4398vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4399 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4400{
4401 tree vec_oprnd;
4402
4403 /* Get first vector operand. */
4404 /* All the vector operands except the very first one (that is scalar oprnd)
4405 are stmt copies. */
4406 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4407 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4408 else
4409 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4410
9771b263 4411 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4412
4413 /* Get second vector operand. */
4414 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4415 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4416
4417 *oprnd = vec_oprnd;
4418
4419 /* For conversion in multiple steps, continue to get operands
4420 recursively. */
4421 if (multi_step_cvt)
4422 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4423}
4424
4425
4426/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4427 For multi-step conversions store the resulting vectors and call the function
4428 recursively. */
4429
4430static void
9771b263 4431vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4432 int multi_step_cvt, gimple *stmt,
9771b263 4433 vec<tree> vec_dsts,
4a00c761
JJ
4434 gimple_stmt_iterator *gsi,
4435 slp_tree slp_node, enum tree_code code,
4436 stmt_vec_info *prev_stmt_info)
4437{
4438 unsigned int i;
4439 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4440 gimple *new_stmt;
4a00c761
JJ
4441 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4442
9771b263 4443 vec_dest = vec_dsts.pop ();
4a00c761 4444
9771b263 4445 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4446 {
4447 /* Create demotion operation. */
9771b263
DN
4448 vop0 = (*vec_oprnds)[i];
4449 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4450 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4451 new_tmp = make_ssa_name (vec_dest, new_stmt);
4452 gimple_assign_set_lhs (new_stmt, new_tmp);
4453 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4454
4455 if (multi_step_cvt)
4456 /* Store the resulting vector for next recursive call. */
9771b263 4457 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4458 else
4459 {
4460 /* This is the last step of the conversion sequence. Store the
4461 vectors in SLP_NODE or in vector info of the scalar statement
4462 (or in STMT_VINFO_RELATED_STMT chain). */
4463 if (slp_node)
9771b263 4464 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4465 else
c689ce1e
RB
4466 {
4467 if (!*prev_stmt_info)
4468 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4469 else
4470 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4471
c689ce1e
RB
4472 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4473 }
4a00c761
JJ
4474 }
4475 }
4476
4477 /* For multi-step demotion operations we first generate demotion operations
4478 from the source type to the intermediate types, and then combine the
4479 results (stored in VEC_OPRNDS) in demotion operation to the destination
4480 type. */
4481 if (multi_step_cvt)
4482 {
4483 /* At each level of recursion we have half of the operands we had at the
4484 previous level. */
9771b263 4485 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4486 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4487 stmt, vec_dsts, gsi, slp_node,
4488 VEC_PACK_TRUNC_EXPR,
4489 prev_stmt_info);
4490 }
4491
9771b263 4492 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4493}
4494
4495
4496/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4497 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4498 the resulting vectors and call the function recursively. */
4499
4500static void
9771b263
DN
4501vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4502 vec<tree> *vec_oprnds1,
355fe088 4503 gimple *stmt, tree vec_dest,
4a00c761
JJ
4504 gimple_stmt_iterator *gsi,
4505 enum tree_code code1,
4506 enum tree_code code2, tree decl1,
4507 tree decl2, int op_type)
4508{
4509 int i;
4510 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4511 gimple *new_stmt1, *new_stmt2;
6e1aa848 4512 vec<tree> vec_tmp = vNULL;
4a00c761 4513
9771b263
DN
4514 vec_tmp.create (vec_oprnds0->length () * 2);
4515 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4516 {
4517 if (op_type == binary_op)
9771b263 4518 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4519 else
4520 vop1 = NULL_TREE;
4521
4522 /* Generate the two halves of promotion operation. */
4523 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4524 op_type, vec_dest, gsi, stmt);
4525 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4526 op_type, vec_dest, gsi, stmt);
4527 if (is_gimple_call (new_stmt1))
4528 {
4529 new_tmp1 = gimple_call_lhs (new_stmt1);
4530 new_tmp2 = gimple_call_lhs (new_stmt2);
4531 }
4532 else
4533 {
4534 new_tmp1 = gimple_assign_lhs (new_stmt1);
4535 new_tmp2 = gimple_assign_lhs (new_stmt2);
4536 }
4537
4538 /* Store the results for the next step. */
9771b263
DN
4539 vec_tmp.quick_push (new_tmp1);
4540 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4541 }
4542
689eaba3 4543 vec_oprnds0->release ();
4a00c761
JJ
4544 *vec_oprnds0 = vec_tmp;
4545}
4546
4547
b8698a0f
L
4548/* Check if STMT performs a conversion operation, that can be vectorized.
4549 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4550 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4551 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4552
4553static bool
355fe088 4554vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
4555 gimple **vec_stmt, slp_tree slp_node,
4556 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4557{
4558 tree vec_dest;
4559 tree scalar_dest;
4a00c761 4560 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4561 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4562 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4563 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4564 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4565 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4566 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4567 tree new_temp;
ebfd146a 4568 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4569 int ndts = 2;
355fe088 4570 gimple *new_stmt = NULL;
ebfd146a 4571 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4572 poly_uint64 nunits_in;
4573 poly_uint64 nunits_out;
ebfd146a 4574 tree vectype_out, vectype_in;
4a00c761
JJ
4575 int ncopies, i, j;
4576 tree lhs_type, rhs_type;
ebfd146a 4577 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4578 vec<tree> vec_oprnds0 = vNULL;
4579 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4580 tree vop0;
4a00c761 4581 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4582 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4583 int multi_step_cvt = 0;
6e1aa848 4584 vec<tree> interm_types = vNULL;
4a00c761
JJ
4585 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4586 int op_type;
4a00c761 4587 unsigned short fltsz;
ebfd146a
IR
4588
4589 /* Is STMT a vectorizable conversion? */
4590
4a00c761 4591 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4592 return false;
4593
66c16fd9
RB
4594 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4595 && ! vec_stmt)
ebfd146a
IR
4596 return false;
4597
4598 if (!is_gimple_assign (stmt))
4599 return false;
4600
4601 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4602 return false;
4603
4604 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4605 if (!CONVERT_EXPR_CODE_P (code)
4606 && code != FIX_TRUNC_EXPR
4607 && code != FLOAT_EXPR
4608 && code != WIDEN_MULT_EXPR
4609 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4610 return false;
4611
4a00c761
JJ
4612 op_type = TREE_CODE_LENGTH (code);
4613
ebfd146a 4614 /* Check types of lhs and rhs. */
b690cc0f 4615 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4616 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4617 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4618
ebfd146a
IR
4619 op0 = gimple_assign_rhs1 (stmt);
4620 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4621
4622 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4623 && !((INTEGRAL_TYPE_P (lhs_type)
4624 && INTEGRAL_TYPE_P (rhs_type))
4625 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4626 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4627 return false;
4628
e6f5c25d
IE
4629 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4630 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4631 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4632 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4633 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4634 {
73fbfcad 4635 if (dump_enabled_p ())
78c60e3d 4636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4637 "type conversion to/from bit-precision unsupported."
4638 "\n");
4a00c761
JJ
4639 return false;
4640 }
4641
b690cc0f 4642 /* Check the operands of the operation. */
894dd753 4643 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
b690cc0f 4644 {
73fbfcad 4645 if (dump_enabled_p ())
78c60e3d 4646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4647 "use not simple.\n");
b690cc0f
RG
4648 return false;
4649 }
4a00c761
JJ
4650 if (op_type == binary_op)
4651 {
4652 bool ok;
4653
4654 op1 = gimple_assign_rhs2 (stmt);
4655 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4656 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4657 OP1. */
4658 if (CONSTANT_CLASS_P (op0))
894dd753 4659 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4a00c761 4660 else
894dd753 4661 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4a00c761
JJ
4662
4663 if (!ok)
4664 {
73fbfcad 4665 if (dump_enabled_p ())
78c60e3d 4666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4667 "use not simple.\n");
4a00c761
JJ
4668 return false;
4669 }
4670 }
4671
b690cc0f
RG
4672 /* If op0 is an external or constant defs use a vector type of
4673 the same size as the output vector type. */
ebfd146a 4674 if (!vectype_in)
b690cc0f 4675 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4676 if (vec_stmt)
4677 gcc_assert (vectype_in);
4678 if (!vectype_in)
4679 {
73fbfcad 4680 if (dump_enabled_p ())
4a00c761 4681 {
78c60e3d
SS
4682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4683 "no vectype for scalar type ");
4684 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4685 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4686 }
7d8930a0
IR
4687
4688 return false;
4689 }
ebfd146a 4690
e6f5c25d
IE
4691 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4692 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4693 {
4694 if (dump_enabled_p ())
4695 {
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4697 "can't convert between boolean and non "
4698 "boolean vectors");
4699 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4700 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4701 }
4702
4703 return false;
4704 }
4705
b690cc0f
RG
4706 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4707 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4708 if (known_eq (nunits_out, nunits_in))
ebfd146a 4709 modifier = NONE;
062d5ccc
RS
4710 else if (multiple_p (nunits_out, nunits_in))
4711 modifier = NARROW;
ebfd146a 4712 else
062d5ccc
RS
4713 {
4714 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4715 modifier = WIDEN;
4716 }
ebfd146a 4717
ff802fa1
IR
4718 /* Multiple types in SLP are handled by creating the appropriate number of
4719 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4720 case of SLP. */
fce57248 4721 if (slp_node)
ebfd146a 4722 ncopies = 1;
4a00c761 4723 else if (modifier == NARROW)
e8f142e2 4724 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4725 else
e8f142e2 4726 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4727
ebfd146a
IR
4728 /* Sanity check: make sure that at least one copy of the vectorized stmt
4729 needs to be generated. */
4730 gcc_assert (ncopies >= 1);
4731
16d22000
RS
4732 bool found_mode = false;
4733 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4734 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4735 opt_scalar_mode rhs_mode_iter;
b397965c 4736
ebfd146a 4737 /* Supportable by target? */
4a00c761 4738 switch (modifier)
ebfd146a 4739 {
4a00c761
JJ
4740 case NONE:
4741 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4742 return false;
4743 if (supportable_convert_operation (code, vectype_out, vectype_in,
4744 &decl1, &code1))
4745 break;
4746 /* FALLTHRU */
4747 unsupported:
73fbfcad 4748 if (dump_enabled_p ())
78c60e3d 4749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4750 "conversion not supported by target.\n");
ebfd146a 4751 return false;
ebfd146a 4752
4a00c761
JJ
4753 case WIDEN:
4754 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4755 &code1, &code2, &multi_step_cvt,
4756 &interm_types))
4a00c761
JJ
4757 {
4758 /* Binary widening operation can only be supported directly by the
4759 architecture. */
4760 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4761 break;
4762 }
4763
4764 if (code != FLOAT_EXPR
b397965c 4765 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4766 goto unsupported;
4767
b397965c 4768 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4769 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4770 {
16d22000 4771 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4772 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4773 break;
4774
4a00c761
JJ
4775 cvt_type
4776 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4777 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4778 if (cvt_type == NULL_TREE)
4779 goto unsupported;
4780
4781 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4782 {
4783 if (!supportable_convert_operation (code, vectype_out,
4784 cvt_type, &decl1, &codecvt1))
4785 goto unsupported;
4786 }
4787 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4788 cvt_type, &codecvt1,
4789 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4790 &interm_types))
4791 continue;
4792 else
4793 gcc_assert (multi_step_cvt == 0);
4794
4795 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4796 vectype_in, &code1, &code2,
4797 &multi_step_cvt, &interm_types))
16d22000
RS
4798 {
4799 found_mode = true;
4800 break;
4801 }
4a00c761
JJ
4802 }
4803
16d22000 4804 if (!found_mode)
4a00c761
JJ
4805 goto unsupported;
4806
4807 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4808 codecvt2 = ERROR_MARK;
4809 else
4810 {
4811 multi_step_cvt++;
9771b263 4812 interm_types.safe_push (cvt_type);
4a00c761
JJ
4813 cvt_type = NULL_TREE;
4814 }
4815 break;
4816
4817 case NARROW:
4818 gcc_assert (op_type == unary_op);
4819 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4820 &code1, &multi_step_cvt,
4821 &interm_types))
4822 break;
4823
4824 if (code != FIX_TRUNC_EXPR
b397965c 4825 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4826 goto unsupported;
4827
4a00c761
JJ
4828 cvt_type
4829 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4830 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4831 if (cvt_type == NULL_TREE)
4832 goto unsupported;
4833 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4834 &decl1, &codecvt1))
4835 goto unsupported;
4836 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4837 &code1, &multi_step_cvt,
4838 &interm_types))
4839 break;
4840 goto unsupported;
4841
4842 default:
4843 gcc_unreachable ();
ebfd146a
IR
4844 }
4845
4846 if (!vec_stmt) /* transformation not required. */
4847 {
adac3a68 4848 DUMP_VECT_SCOPE ("vectorizable_conversion");
4a00c761 4849 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4850 {
4851 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4852 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4853 cost_vec);
8bd37302 4854 }
4a00c761
JJ
4855 else if (modifier == NARROW)
4856 {
4857 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4858 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4859 cost_vec);
4a00c761
JJ
4860 }
4861 else
4862 {
4863 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4864 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4865 cost_vec);
4a00c761 4866 }
9771b263 4867 interm_types.release ();
ebfd146a
IR
4868 return true;
4869 }
4870
67b8dbac 4871 /* Transform. */
73fbfcad 4872 if (dump_enabled_p ())
78c60e3d 4873 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4874 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4875
4a00c761
JJ
4876 if (op_type == binary_op)
4877 {
4878 if (CONSTANT_CLASS_P (op0))
4879 op0 = fold_convert (TREE_TYPE (op1), op0);
4880 else if (CONSTANT_CLASS_P (op1))
4881 op1 = fold_convert (TREE_TYPE (op0), op1);
4882 }
4883
4884 /* In case of multi-step conversion, we first generate conversion operations
4885 to the intermediate types, and then from that types to the final one.
4886 We create vector destinations for the intermediate type (TYPES) received
4887 from supportable_*_operation, and store them in the correct order
4888 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4889 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4890 vec_dest = vect_create_destination_var (scalar_dest,
4891 (cvt_type && modifier == WIDEN)
4892 ? cvt_type : vectype_out);
9771b263 4893 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4894
4895 if (multi_step_cvt)
4896 {
9771b263
DN
4897 for (i = interm_types.length () - 1;
4898 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4899 {
4900 vec_dest = vect_create_destination_var (scalar_dest,
4901 intermediate_type);
9771b263 4902 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4903 }
4904 }
ebfd146a 4905
4a00c761 4906 if (cvt_type)
82294ec1
JJ
4907 vec_dest = vect_create_destination_var (scalar_dest,
4908 modifier == WIDEN
4909 ? vectype_out : cvt_type);
4a00c761
JJ
4910
4911 if (!slp_node)
4912 {
30862efc 4913 if (modifier == WIDEN)
4a00c761 4914 {
c3284718 4915 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4916 if (op_type == binary_op)
9771b263 4917 vec_oprnds1.create (1);
4a00c761 4918 }
30862efc 4919 else if (modifier == NARROW)
9771b263
DN
4920 vec_oprnds0.create (
4921 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4922 }
4923 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4924 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4925
4a00c761 4926 last_oprnd = op0;
ebfd146a
IR
4927 prev_stmt_info = NULL;
4928 switch (modifier)
4929 {
4930 case NONE:
4931 for (j = 0; j < ncopies; j++)
4932 {
ebfd146a 4933 if (j == 0)
306b0c92 4934 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4935 else
4936 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4937
9771b263 4938 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4939 {
4940 /* Arguments are ready, create the new vector stmt. */
4941 if (code1 == CALL_EXPR)
4942 {
4943 new_stmt = gimple_build_call (decl1, 1, vop0);
4944 new_temp = make_ssa_name (vec_dest, new_stmt);
4945 gimple_call_set_lhs (new_stmt, new_temp);
4946 }
4947 else
4948 {
4949 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4950 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4951 new_temp = make_ssa_name (vec_dest, new_stmt);
4952 gimple_assign_set_lhs (new_stmt, new_temp);
4953 }
4954
4955 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4956 if (slp_node)
9771b263 4957 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4958 else
4959 {
4960 if (!prev_stmt_info)
4961 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4962 else
4963 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4964 prev_stmt_info = vinfo_for_stmt (new_stmt);
4965 }
4a00c761 4966 }
ebfd146a
IR
4967 }
4968 break;
4969
4970 case WIDEN:
4971 /* In case the vectorization factor (VF) is bigger than the number
4972 of elements that we can fit in a vectype (nunits), we have to
4973 generate more than one vector stmt - i.e - we need to "unroll"
4974 the vector stmt by a factor VF/nunits. */
4975 for (j = 0; j < ncopies; j++)
4976 {
4a00c761 4977 /* Handle uses. */
ebfd146a 4978 if (j == 0)
4a00c761
JJ
4979 {
4980 if (slp_node)
4981 {
4982 if (code == WIDEN_LSHIFT_EXPR)
4983 {
4984 unsigned int k;
ebfd146a 4985
4a00c761
JJ
4986 vec_oprnd1 = op1;
4987 /* Store vec_oprnd1 for every vector stmt to be created
4988 for SLP_NODE. We check during the analysis that all
4989 the shift arguments are the same. */
4990 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4991 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4992
4993 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4994 slp_node);
4a00c761
JJ
4995 }
4996 else
4997 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4998 &vec_oprnds1, slp_node);
4a00c761
JJ
4999 }
5000 else
5001 {
81c40241 5002 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 5003 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5004 if (op_type == binary_op)
5005 {
5006 if (code == WIDEN_LSHIFT_EXPR)
5007 vec_oprnd1 = op1;
5008 else
81c40241 5009 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 5010 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5011 }
5012 }
5013 }
ebfd146a 5014 else
4a00c761
JJ
5015 {
5016 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
5017 vec_oprnds0.truncate (0);
5018 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5019 if (op_type == binary_op)
5020 {
5021 if (code == WIDEN_LSHIFT_EXPR)
5022 vec_oprnd1 = op1;
5023 else
5024 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5025 vec_oprnd1);
9771b263
DN
5026 vec_oprnds1.truncate (0);
5027 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5028 }
5029 }
ebfd146a 5030
4a00c761
JJ
5031 /* Arguments are ready. Create the new vector stmts. */
5032 for (i = multi_step_cvt; i >= 0; i--)
5033 {
9771b263 5034 tree this_dest = vec_dsts[i];
4a00c761
JJ
5035 enum tree_code c1 = code1, c2 = code2;
5036 if (i == 0 && codecvt2 != ERROR_MARK)
5037 {
5038 c1 = codecvt1;
5039 c2 = codecvt2;
5040 }
5041 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5042 &vec_oprnds1,
5043 stmt, this_dest, gsi,
5044 c1, c2, decl1, decl2,
5045 op_type);
5046 }
5047
9771b263 5048 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5049 {
5050 if (cvt_type)
5051 {
5052 if (codecvt1 == CALL_EXPR)
5053 {
5054 new_stmt = gimple_build_call (decl1, 1, vop0);
5055 new_temp = make_ssa_name (vec_dest, new_stmt);
5056 gimple_call_set_lhs (new_stmt, new_temp);
5057 }
5058 else
5059 {
5060 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5061 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5062 new_stmt = gimple_build_assign (new_temp, codecvt1,
5063 vop0);
4a00c761
JJ
5064 }
5065
5066 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5067 }
5068 else
5069 new_stmt = SSA_NAME_DEF_STMT (vop0);
5070
5071 if (slp_node)
9771b263 5072 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 5073 else
c689ce1e
RB
5074 {
5075 if (!prev_stmt_info)
5076 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5077 else
5078 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5079 prev_stmt_info = vinfo_for_stmt (new_stmt);
5080 }
4a00c761 5081 }
ebfd146a 5082 }
4a00c761
JJ
5083
5084 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5085 break;
5086
5087 case NARROW:
5088 /* In case the vectorization factor (VF) is bigger than the number
5089 of elements that we can fit in a vectype (nunits), we have to
5090 generate more than one vector stmt - i.e - we need to "unroll"
5091 the vector stmt by a factor VF/nunits. */
5092 for (j = 0; j < ncopies; j++)
5093 {
5094 /* Handle uses. */
4a00c761
JJ
5095 if (slp_node)
5096 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5097 slp_node);
ebfd146a
IR
5098 else
5099 {
9771b263 5100 vec_oprnds0.truncate (0);
4a00c761
JJ
5101 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5102 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5103 }
5104
4a00c761
JJ
5105 /* Arguments are ready. Create the new vector stmts. */
5106 if (cvt_type)
9771b263 5107 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5108 {
5109 if (codecvt1 == CALL_EXPR)
5110 {
5111 new_stmt = gimple_build_call (decl1, 1, vop0);
5112 new_temp = make_ssa_name (vec_dest, new_stmt);
5113 gimple_call_set_lhs (new_stmt, new_temp);
5114 }
5115 else
5116 {
5117 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5118 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5119 new_stmt = gimple_build_assign (new_temp, codecvt1,
5120 vop0);
4a00c761 5121 }
ebfd146a 5122
4a00c761 5123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 5124 vec_oprnds0[i] = new_temp;
4a00c761 5125 }
ebfd146a 5126
4a00c761
JJ
5127 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5128 stmt, vec_dsts, gsi,
5129 slp_node, code1,
5130 &prev_stmt_info);
ebfd146a
IR
5131 }
5132
5133 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5134 break;
ebfd146a
IR
5135 }
5136
9771b263
DN
5137 vec_oprnds0.release ();
5138 vec_oprnds1.release ();
9771b263 5139 interm_types.release ();
ebfd146a
IR
5140
5141 return true;
5142}
ff802fa1
IR
5143
5144
ebfd146a
IR
5145/* Function vectorizable_assignment.
5146
b8698a0f
L
5147 Check if STMT performs an assignment (copy) that can be vectorized.
5148 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5149 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5150 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5151
5152static bool
355fe088 5153vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5154 gimple **vec_stmt, slp_tree slp_node,
5155 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5156{
5157 tree vec_dest;
5158 tree scalar_dest;
5159 tree op;
5160 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5161 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5162 tree new_temp;
4fc5ebf1
JG
5163 enum vect_def_type dt[1] = {vect_unknown_def_type};
5164 int ndts = 1;
ebfd146a 5165 int ncopies;
f18b55bd 5166 int i, j;
6e1aa848 5167 vec<tree> vec_oprnds = vNULL;
ebfd146a 5168 tree vop;
a70d6342 5169 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5170 vec_info *vinfo = stmt_info->vinfo;
355fe088 5171 gimple *new_stmt = NULL;
f18b55bd 5172 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5173 enum tree_code code;
5174 tree vectype_in;
ebfd146a 5175
a70d6342 5176 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5177 return false;
5178
66c16fd9
RB
5179 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5180 && ! vec_stmt)
ebfd146a
IR
5181 return false;
5182
5183 /* Is vectorizable assignment? */
5184 if (!is_gimple_assign (stmt))
5185 return false;
5186
5187 scalar_dest = gimple_assign_lhs (stmt);
5188 if (TREE_CODE (scalar_dest) != SSA_NAME)
5189 return false;
5190
fde9c428 5191 code = gimple_assign_rhs_code (stmt);
ebfd146a 5192 if (gimple_assign_single_p (stmt)
fde9c428
RG
5193 || code == PAREN_EXPR
5194 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5195 op = gimple_assign_rhs1 (stmt);
5196 else
5197 return false;
5198
7b7ec6c5
RG
5199 if (code == VIEW_CONVERT_EXPR)
5200 op = TREE_OPERAND (op, 0);
5201
465c8c19 5202 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5203 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5204
5205 /* Multiple types in SLP are handled by creating the appropriate number of
5206 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5207 case of SLP. */
fce57248 5208 if (slp_node)
465c8c19
JJ
5209 ncopies = 1;
5210 else
e8f142e2 5211 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5212
5213 gcc_assert (ncopies >= 1);
5214
894dd753 5215 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
ebfd146a 5216 {
73fbfcad 5217 if (dump_enabled_p ())
78c60e3d 5218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5219 "use not simple.\n");
ebfd146a
IR
5220 return false;
5221 }
5222
fde9c428
RG
5223 /* We can handle NOP_EXPR conversions that do not change the number
5224 of elements or the vector size. */
7b7ec6c5
RG
5225 if ((CONVERT_EXPR_CODE_P (code)
5226 || code == VIEW_CONVERT_EXPR)
fde9c428 5227 && (!vectype_in
928686b1 5228 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5229 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5230 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5231 return false;
5232
7b7b1813
RG
5233 /* We do not handle bit-precision changes. */
5234 if ((CONVERT_EXPR_CODE_P (code)
5235 || code == VIEW_CONVERT_EXPR)
5236 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5237 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5238 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5239 /* But a conversion that does not change the bit-pattern is ok. */
5240 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5241 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5242 && TYPE_UNSIGNED (TREE_TYPE (op)))
5243 /* Conversion between boolean types of different sizes is
5244 a simple assignment in case their vectypes are same
5245 boolean vectors. */
5246 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5247 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5248 {
73fbfcad 5249 if (dump_enabled_p ())
78c60e3d
SS
5250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5251 "type conversion to/from bit-precision "
e645e942 5252 "unsupported.\n");
7b7b1813
RG
5253 return false;
5254 }
5255
ebfd146a
IR
5256 if (!vec_stmt) /* transformation not required. */
5257 {
5258 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
adac3a68 5259 DUMP_VECT_SCOPE ("vectorizable_assignment");
68435eb2 5260 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5261 return true;
5262 }
5263
67b8dbac 5264 /* Transform. */
73fbfcad 5265 if (dump_enabled_p ())
e645e942 5266 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5267
5268 /* Handle def. */
5269 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5270
5271 /* Handle use. */
f18b55bd 5272 for (j = 0; j < ncopies; j++)
ebfd146a 5273 {
f18b55bd
IR
5274 /* Handle uses. */
5275 if (j == 0)
306b0c92 5276 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5277 else
5278 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5279
5280 /* Arguments are ready. create the new vector stmt. */
9771b263 5281 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5282 {
7b7ec6c5
RG
5283 if (CONVERT_EXPR_CODE_P (code)
5284 || code == VIEW_CONVERT_EXPR)
4a73490d 5285 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
5286 new_stmt = gimple_build_assign (vec_dest, vop);
5287 new_temp = make_ssa_name (vec_dest, new_stmt);
5288 gimple_assign_set_lhs (new_stmt, new_temp);
5289 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5290 if (slp_node)
9771b263 5291 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 5292 }
ebfd146a
IR
5293
5294 if (slp_node)
f18b55bd
IR
5295 continue;
5296
5297 if (j == 0)
5298 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5299 else
5300 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5301
5302 prev_stmt_info = vinfo_for_stmt (new_stmt);
5303 }
b8698a0f 5304
9771b263 5305 vec_oprnds.release ();
ebfd146a
IR
5306 return true;
5307}
5308
9dc3f7de 5309
1107f3ae
IR
5310/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5311 either as shift by a scalar or by a vector. */
5312
5313bool
5314vect_supportable_shift (enum tree_code code, tree scalar_type)
5315{
5316
ef4bddc2 5317 machine_mode vec_mode;
1107f3ae
IR
5318 optab optab;
5319 int icode;
5320 tree vectype;
5321
5322 vectype = get_vectype_for_scalar_type (scalar_type);
5323 if (!vectype)
5324 return false;
5325
5326 optab = optab_for_tree_code (code, vectype, optab_scalar);
5327 if (!optab
5328 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5329 {
5330 optab = optab_for_tree_code (code, vectype, optab_vector);
5331 if (!optab
5332 || (optab_handler (optab, TYPE_MODE (vectype))
5333 == CODE_FOR_nothing))
5334 return false;
5335 }
5336
5337 vec_mode = TYPE_MODE (vectype);
5338 icode = (int) optab_handler (optab, vec_mode);
5339 if (icode == CODE_FOR_nothing)
5340 return false;
5341
5342 return true;
5343}
5344
5345
9dc3f7de
IR
5346/* Function vectorizable_shift.
5347
5348 Check if STMT performs a shift operation that can be vectorized.
5349 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5350 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5351 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5352
5353static bool
355fe088 5354vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5355 gimple **vec_stmt, slp_tree slp_node,
5356 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5357{
5358 tree vec_dest;
5359 tree scalar_dest;
5360 tree op0, op1 = NULL;
5361 tree vec_oprnd1 = NULL_TREE;
5362 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5363 tree vectype;
5364 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5365 enum tree_code code;
ef4bddc2 5366 machine_mode vec_mode;
9dc3f7de
IR
5367 tree new_temp;
5368 optab optab;
5369 int icode;
ef4bddc2 5370 machine_mode optab_op2_mode;
9dc3f7de 5371 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5372 int ndts = 2;
355fe088 5373 gimple *new_stmt = NULL;
9dc3f7de 5374 stmt_vec_info prev_stmt_info;
928686b1
RS
5375 poly_uint64 nunits_in;
5376 poly_uint64 nunits_out;
9dc3f7de 5377 tree vectype_out;
cede2577 5378 tree op1_vectype;
9dc3f7de
IR
5379 int ncopies;
5380 int j, i;
6e1aa848
DN
5381 vec<tree> vec_oprnds0 = vNULL;
5382 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5383 tree vop0, vop1;
5384 unsigned int k;
49eab32e 5385 bool scalar_shift_arg = true;
9dc3f7de 5386 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5387 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5388
5389 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5390 return false;
5391
66c16fd9
RB
5392 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5393 && ! vec_stmt)
9dc3f7de
IR
5394 return false;
5395
5396 /* Is STMT a vectorizable binary/unary operation? */
5397 if (!is_gimple_assign (stmt))
5398 return false;
5399
5400 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5401 return false;
5402
5403 code = gimple_assign_rhs_code (stmt);
5404
5405 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5406 || code == RROTATE_EXPR))
5407 return false;
5408
5409 scalar_dest = gimple_assign_lhs (stmt);
5410 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5411 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5412 {
73fbfcad 5413 if (dump_enabled_p ())
78c60e3d 5414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5415 "bit-precision shifts not supported.\n");
7b7b1813
RG
5416 return false;
5417 }
9dc3f7de
IR
5418
5419 op0 = gimple_assign_rhs1 (stmt);
894dd753 5420 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
9dc3f7de 5421 {
73fbfcad 5422 if (dump_enabled_p ())
78c60e3d 5423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5424 "use not simple.\n");
9dc3f7de
IR
5425 return false;
5426 }
5427 /* If op0 is an external or constant def use a vector type with
5428 the same size as the output vector type. */
5429 if (!vectype)
5430 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5431 if (vec_stmt)
5432 gcc_assert (vectype);
5433 if (!vectype)
5434 {
73fbfcad 5435 if (dump_enabled_p ())
78c60e3d 5436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5437 "no vectype for scalar type\n");
9dc3f7de
IR
5438 return false;
5439 }
5440
5441 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5442 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5443 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5444 return false;
5445
5446 op1 = gimple_assign_rhs2 (stmt);
894dd753 5447 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype))
9dc3f7de 5448 {
73fbfcad 5449 if (dump_enabled_p ())
78c60e3d 5450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5451 "use not simple.\n");
9dc3f7de
IR
5452 return false;
5453 }
5454
9dc3f7de
IR
5455 /* Multiple types in SLP are handled by creating the appropriate number of
5456 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5457 case of SLP. */
fce57248 5458 if (slp_node)
9dc3f7de
IR
5459 ncopies = 1;
5460 else
e8f142e2 5461 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5462
5463 gcc_assert (ncopies >= 1);
5464
5465 /* Determine whether the shift amount is a vector, or scalar. If the
5466 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5467
dbfa87aa
YR
5468 if ((dt[1] == vect_internal_def
5469 || dt[1] == vect_induction_def)
5470 && !slp_node)
49eab32e
JJ
5471 scalar_shift_arg = false;
5472 else if (dt[1] == vect_constant_def
5473 || dt[1] == vect_external_def
5474 || dt[1] == vect_internal_def)
5475 {
5476 /* In SLP, need to check whether the shift count is the same,
5477 in loops if it is a constant or invariant, it is always
5478 a scalar shift. */
5479 if (slp_node)
5480 {
355fe088
TS
5481 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5482 gimple *slpstmt;
49eab32e 5483
9771b263 5484 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5485 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5486 scalar_shift_arg = false;
5487 }
60d393e8
RB
5488
5489 /* If the shift amount is computed by a pattern stmt we cannot
5490 use the scalar amount directly thus give up and use a vector
5491 shift. */
5492 if (dt[1] == vect_internal_def)
5493 {
5494 gimple *def = SSA_NAME_DEF_STMT (op1);
5495 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5496 scalar_shift_arg = false;
5497 }
49eab32e
JJ
5498 }
5499 else
5500 {
73fbfcad 5501 if (dump_enabled_p ())
78c60e3d 5502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5503 "operand mode requires invariant argument.\n");
49eab32e
JJ
5504 return false;
5505 }
5506
9dc3f7de 5507 /* Vector shifted by vector. */
49eab32e 5508 if (!scalar_shift_arg)
9dc3f7de
IR
5509 {
5510 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5511 if (dump_enabled_p ())
78c60e3d 5512 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5513 "vector/vector shift/rotate found.\n");
78c60e3d 5514
aa948027
JJ
5515 if (!op1_vectype)
5516 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5517 if (op1_vectype == NULL_TREE
5518 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5519 {
73fbfcad 5520 if (dump_enabled_p ())
78c60e3d
SS
5521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5522 "unusable type for last operand in"
e645e942 5523 " vector/vector shift/rotate.\n");
cede2577
JJ
5524 return false;
5525 }
9dc3f7de
IR
5526 }
5527 /* See if the machine has a vector shifted by scalar insn and if not
5528 then see if it has a vector shifted by vector insn. */
49eab32e 5529 else
9dc3f7de
IR
5530 {
5531 optab = optab_for_tree_code (code, vectype, optab_scalar);
5532 if (optab
5533 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5534 {
73fbfcad 5535 if (dump_enabled_p ())
78c60e3d 5536 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5537 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5538 }
5539 else
5540 {
5541 optab = optab_for_tree_code (code, vectype, optab_vector);
5542 if (optab
5543 && (optab_handler (optab, TYPE_MODE (vectype))
5544 != CODE_FOR_nothing))
5545 {
49eab32e
JJ
5546 scalar_shift_arg = false;
5547
73fbfcad 5548 if (dump_enabled_p ())
78c60e3d 5549 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5550 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5551
5552 /* Unlike the other binary operators, shifts/rotates have
5553 the rhs being int, instead of the same type as the lhs,
5554 so make sure the scalar is the right type if we are
aa948027 5555 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5556 if (dt[1] == vect_constant_def)
5557 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5558 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5559 TREE_TYPE (op1)))
5560 {
5561 if (slp_node
5562 && TYPE_MODE (TREE_TYPE (vectype))
5563 != TYPE_MODE (TREE_TYPE (op1)))
5564 {
73fbfcad 5565 if (dump_enabled_p ())
78c60e3d
SS
5566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5567 "unusable type for last operand in"
e645e942 5568 " vector/vector shift/rotate.\n");
21c0a521 5569 return false;
aa948027
JJ
5570 }
5571 if (vec_stmt && !slp_node)
5572 {
5573 op1 = fold_convert (TREE_TYPE (vectype), op1);
5574 op1 = vect_init_vector (stmt, op1,
5575 TREE_TYPE (vectype), NULL);
5576 }
5577 }
9dc3f7de
IR
5578 }
5579 }
5580 }
9dc3f7de
IR
5581
5582 /* Supportable by target? */
5583 if (!optab)
5584 {
73fbfcad 5585 if (dump_enabled_p ())
78c60e3d 5586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5587 "no optab.\n");
9dc3f7de
IR
5588 return false;
5589 }
5590 vec_mode = TYPE_MODE (vectype);
5591 icode = (int) optab_handler (optab, vec_mode);
5592 if (icode == CODE_FOR_nothing)
5593 {
73fbfcad 5594 if (dump_enabled_p ())
78c60e3d 5595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5596 "op not supported by target.\n");
9dc3f7de 5597 /* Check only during analysis. */
cf098191 5598 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5599 || (!vec_stmt
5600 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5601 return false;
73fbfcad 5602 if (dump_enabled_p ())
e645e942
TJ
5603 dump_printf_loc (MSG_NOTE, vect_location,
5604 "proceeding using word mode.\n");
9dc3f7de
IR
5605 }
5606
5607 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5608 if (!vec_stmt
5609 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5610 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5611 {
73fbfcad 5612 if (dump_enabled_p ())
78c60e3d 5613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5614 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5615 return false;
5616 }
5617
5618 if (!vec_stmt) /* transformation not required. */
5619 {
5620 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
adac3a68 5621 DUMP_VECT_SCOPE ("vectorizable_shift");
68435eb2 5622 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5623 return true;
5624 }
5625
67b8dbac 5626 /* Transform. */
9dc3f7de 5627
73fbfcad 5628 if (dump_enabled_p ())
78c60e3d 5629 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5630 "transform binary/unary operation.\n");
9dc3f7de
IR
5631
5632 /* Handle def. */
5633 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5634
9dc3f7de
IR
5635 prev_stmt_info = NULL;
5636 for (j = 0; j < ncopies; j++)
5637 {
5638 /* Handle uses. */
5639 if (j == 0)
5640 {
5641 if (scalar_shift_arg)
5642 {
5643 /* Vector shl and shr insn patterns can be defined with scalar
5644 operand 2 (shift operand). In this case, use constant or loop
5645 invariant op1 directly, without extending it to vector mode
5646 first. */
5647 optab_op2_mode = insn_data[icode].operand[2].mode;
5648 if (!VECTOR_MODE_P (optab_op2_mode))
5649 {
73fbfcad 5650 if (dump_enabled_p ())
78c60e3d 5651 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5652 "operand 1 using scalar mode.\n");
9dc3f7de 5653 vec_oprnd1 = op1;
8930f723 5654 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5655 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5656 if (slp_node)
5657 {
5658 /* Store vec_oprnd1 for every vector stmt to be created
5659 for SLP_NODE. We check during the analysis that all
5660 the shift arguments are the same.
5661 TODO: Allow different constants for different vector
5662 stmts generated for an SLP instance. */
5663 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5664 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5665 }
5666 }
5667 }
5668
5669 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5670 (a special case for certain kind of vector shifts); otherwise,
5671 operand 1 should be of a vector type (the usual case). */
5672 if (vec_oprnd1)
5673 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5674 slp_node);
9dc3f7de
IR
5675 else
5676 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5677 slp_node);
9dc3f7de
IR
5678 }
5679 else
5680 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5681
5682 /* Arguments are ready. Create the new vector stmt. */
9771b263 5683 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5684 {
9771b263 5685 vop1 = vec_oprnds1[i];
0d0e4a03 5686 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5687 new_temp = make_ssa_name (vec_dest, new_stmt);
5688 gimple_assign_set_lhs (new_stmt, new_temp);
5689 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5690 if (slp_node)
9771b263 5691 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5692 }
5693
5694 if (slp_node)
5695 continue;
5696
5697 if (j == 0)
5698 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5699 else
5700 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5701 prev_stmt_info = vinfo_for_stmt (new_stmt);
5702 }
5703
9771b263
DN
5704 vec_oprnds0.release ();
5705 vec_oprnds1.release ();
9dc3f7de
IR
5706
5707 return true;
5708}
5709
5710
ebfd146a
IR
5711/* Function vectorizable_operation.
5712
16949072
RG
5713 Check if STMT performs a binary, unary or ternary operation that can
5714 be vectorized.
b8698a0f 5715 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5716 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5717 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5718
5719static bool
355fe088 5720vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5721 gimple **vec_stmt, slp_tree slp_node,
5722 stmt_vector_for_cost *cost_vec)
ebfd146a 5723{
00f07b86 5724 tree vec_dest;
ebfd146a 5725 tree scalar_dest;
16949072 5726 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5727 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5728 tree vectype;
ebfd146a 5729 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5730 enum tree_code code, orig_code;
ef4bddc2 5731 machine_mode vec_mode;
ebfd146a
IR
5732 tree new_temp;
5733 int op_type;
00f07b86 5734 optab optab;
523ba738 5735 bool target_support_p;
16949072
RG
5736 enum vect_def_type dt[3]
5737 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5738 int ndts = 3;
355fe088 5739 gimple *new_stmt = NULL;
ebfd146a 5740 stmt_vec_info prev_stmt_info;
928686b1
RS
5741 poly_uint64 nunits_in;
5742 poly_uint64 nunits_out;
ebfd146a
IR
5743 tree vectype_out;
5744 int ncopies;
5745 int j, i;
6e1aa848
DN
5746 vec<tree> vec_oprnds0 = vNULL;
5747 vec<tree> vec_oprnds1 = vNULL;
5748 vec<tree> vec_oprnds2 = vNULL;
16949072 5749 tree vop0, vop1, vop2;
a70d6342 5750 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5751 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5752
a70d6342 5753 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5754 return false;
5755
66c16fd9
RB
5756 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5757 && ! vec_stmt)
ebfd146a
IR
5758 return false;
5759
5760 /* Is STMT a vectorizable binary/unary operation? */
5761 if (!is_gimple_assign (stmt))
5762 return false;
5763
5764 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5765 return false;
5766
0eb952ea 5767 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5768
1af4ebf5
MG
5769 /* For pointer addition and subtraction, we should use the normal
5770 plus and minus for the vector operation. */
ebfd146a
IR
5771 if (code == POINTER_PLUS_EXPR)
5772 code = PLUS_EXPR;
1af4ebf5
MG
5773 if (code == POINTER_DIFF_EXPR)
5774 code = MINUS_EXPR;
ebfd146a
IR
5775
5776 /* Support only unary or binary operations. */
5777 op_type = TREE_CODE_LENGTH (code);
16949072 5778 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5779 {
73fbfcad 5780 if (dump_enabled_p ())
78c60e3d 5781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5782 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5783 op_type);
ebfd146a
IR
5784 return false;
5785 }
5786
b690cc0f
RG
5787 scalar_dest = gimple_assign_lhs (stmt);
5788 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5789
7b7b1813
RG
5790 /* Most operations cannot handle bit-precision types without extra
5791 truncations. */
045c1278 5792 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5793 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5794 /* Exception are bitwise binary operations. */
5795 && code != BIT_IOR_EXPR
5796 && code != BIT_XOR_EXPR
5797 && code != BIT_AND_EXPR)
5798 {
73fbfcad 5799 if (dump_enabled_p ())
78c60e3d 5800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5801 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5802 return false;
5803 }
5804
ebfd146a 5805 op0 = gimple_assign_rhs1 (stmt);
894dd753 5806 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
ebfd146a 5807 {
73fbfcad 5808 if (dump_enabled_p ())
78c60e3d 5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5810 "use not simple.\n");
ebfd146a
IR
5811 return false;
5812 }
b690cc0f
RG
5813 /* If op0 is an external or constant def use a vector type with
5814 the same size as the output vector type. */
5815 if (!vectype)
b036c6c5
IE
5816 {
5817 /* For boolean type we cannot determine vectype by
5818 invariant value (don't know whether it is a vector
5819 of booleans or vector of integers). We use output
5820 vectype because operations on boolean don't change
5821 type. */
2568d8a1 5822 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5823 {
2568d8a1 5824 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5825 {
5826 if (dump_enabled_p ())
5827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5828 "not supported operation on bool value.\n");
5829 return false;
5830 }
5831 vectype = vectype_out;
5832 }
5833 else
5834 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5835 }
7d8930a0
IR
5836 if (vec_stmt)
5837 gcc_assert (vectype);
5838 if (!vectype)
5839 {
73fbfcad 5840 if (dump_enabled_p ())
7d8930a0 5841 {
78c60e3d
SS
5842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5843 "no vectype for scalar type ");
5844 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5845 TREE_TYPE (op0));
e645e942 5846 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5847 }
5848
5849 return false;
5850 }
b690cc0f
RG
5851
5852 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5853 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5854 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5855 return false;
ebfd146a 5856
16949072 5857 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5858 {
5859 op1 = gimple_assign_rhs2 (stmt);
894dd753 5860 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
ebfd146a 5861 {
73fbfcad 5862 if (dump_enabled_p ())
78c60e3d 5863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5864 "use not simple.\n");
ebfd146a
IR
5865 return false;
5866 }
5867 }
16949072
RG
5868 if (op_type == ternary_op)
5869 {
5870 op2 = gimple_assign_rhs3 (stmt);
894dd753 5871 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
16949072 5872 {
73fbfcad 5873 if (dump_enabled_p ())
78c60e3d 5874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5875 "use not simple.\n");
16949072
RG
5876 return false;
5877 }
5878 }
ebfd146a 5879
b690cc0f 5880 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5881 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5882 case of SLP. */
fce57248 5883 if (slp_node)
b690cc0f
RG
5884 ncopies = 1;
5885 else
e8f142e2 5886 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5887
5888 gcc_assert (ncopies >= 1);
5889
9dc3f7de 5890 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5891 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5892 || code == RROTATE_EXPR)
9dc3f7de 5893 return false;
ebfd146a 5894
ebfd146a 5895 /* Supportable by target? */
00f07b86
RH
5896
5897 vec_mode = TYPE_MODE (vectype);
5898 if (code == MULT_HIGHPART_EXPR)
523ba738 5899 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5900 else
5901 {
5902 optab = optab_for_tree_code (code, vectype, optab_default);
5903 if (!optab)
5deb57cb 5904 {
73fbfcad 5905 if (dump_enabled_p ())
78c60e3d 5906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5907 "no optab.\n");
00f07b86 5908 return false;
5deb57cb 5909 }
523ba738
RS
5910 target_support_p = (optab_handler (optab, vec_mode)
5911 != CODE_FOR_nothing);
5deb57cb
JJ
5912 }
5913
523ba738 5914 if (!target_support_p)
ebfd146a 5915 {
73fbfcad 5916 if (dump_enabled_p ())
78c60e3d 5917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5918 "op not supported by target.\n");
ebfd146a 5919 /* Check only during analysis. */
cf098191 5920 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5921 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5922 return false;
73fbfcad 5923 if (dump_enabled_p ())
e645e942
TJ
5924 dump_printf_loc (MSG_NOTE, vect_location,
5925 "proceeding using word mode.\n");
383d9c83
IR
5926 }
5927
4a00c761 5928 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5929 if (!VECTOR_MODE_P (vec_mode)
5930 && !vec_stmt
ca09abcb 5931 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5932 {
73fbfcad 5933 if (dump_enabled_p ())
78c60e3d 5934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5935 "not worthwhile without SIMD support.\n");
e34842c6 5936 return false;
7d8930a0 5937 }
ebfd146a 5938
ebfd146a
IR
5939 if (!vec_stmt) /* transformation not required. */
5940 {
4a00c761 5941 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
adac3a68 5942 DUMP_VECT_SCOPE ("vectorizable_operation");
68435eb2 5943 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5944 return true;
5945 }
5946
67b8dbac 5947 /* Transform. */
ebfd146a 5948
73fbfcad 5949 if (dump_enabled_p ())
78c60e3d 5950 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5951 "transform binary/unary operation.\n");
383d9c83 5952
0eb952ea
JJ
5953 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5954 vectors with unsigned elements, but the result is signed. So, we
5955 need to compute the MINUS_EXPR into vectype temporary and
5956 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5957 tree vec_cvt_dest = NULL_TREE;
5958 if (orig_code == POINTER_DIFF_EXPR)
7b76867b
RB
5959 {
5960 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5961 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5962 }
5963 /* Handle def. */
5964 else
5965 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
0eb952ea 5966
ebfd146a
IR
5967 /* In case the vectorization factor (VF) is bigger than the number
5968 of elements that we can fit in a vectype (nunits), we have to generate
5969 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5970 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5971 from one copy of the vector stmt to the next, in the field
5972 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5973 stages to find the correct vector defs to be used when vectorizing
5974 stmts that use the defs of the current stmt. The example below
5975 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5976 we need to create 4 vectorized stmts):
5977
5978 before vectorization:
5979 RELATED_STMT VEC_STMT
5980 S1: x = memref - -
5981 S2: z = x + 1 - -
5982
5983 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5984 there):
5985 RELATED_STMT VEC_STMT
5986 VS1_0: vx0 = memref0 VS1_1 -
5987 VS1_1: vx1 = memref1 VS1_2 -
5988 VS1_2: vx2 = memref2 VS1_3 -
5989 VS1_3: vx3 = memref3 - -
5990 S1: x = load - VS1_0
5991 S2: z = x + 1 - -
5992
5993 step2: vectorize stmt S2 (done here):
5994 To vectorize stmt S2 we first need to find the relevant vector
5995 def for the first operand 'x'. This is, as usual, obtained from
5996 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5997 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5998 relevant vector def 'vx0'. Having found 'vx0' we can generate
5999 the vector stmt VS2_0, and as usual, record it in the
6000 STMT_VINFO_VEC_STMT of stmt S2.
6001 When creating the second copy (VS2_1), we obtain the relevant vector
6002 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6003 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6004 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6005 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6006 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6007 chain of stmts and pointers:
6008 RELATED_STMT VEC_STMT
6009 VS1_0: vx0 = memref0 VS1_1 -
6010 VS1_1: vx1 = memref1 VS1_2 -
6011 VS1_2: vx2 = memref2 VS1_3 -
6012 VS1_3: vx3 = memref3 - -
6013 S1: x = load - VS1_0
6014 VS2_0: vz0 = vx0 + v1 VS2_1 -
6015 VS2_1: vz1 = vx1 + v1 VS2_2 -
6016 VS2_2: vz2 = vx2 + v1 VS2_3 -
6017 VS2_3: vz3 = vx3 + v1 - -
6018 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6019
6020 prev_stmt_info = NULL;
6021 for (j = 0; j < ncopies; j++)
6022 {
6023 /* Handle uses. */
6024 if (j == 0)
4a00c761 6025 {
d6476f90 6026 if (op_type == binary_op)
4a00c761 6027 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 6028 slp_node);
d6476f90
RB
6029 else if (op_type == ternary_op)
6030 {
6031 if (slp_node)
6032 {
6033 auto_vec<tree> ops(3);
6034 ops.quick_push (op0);
6035 ops.quick_push (op1);
6036 ops.quick_push (op2);
6037 auto_vec<vec<tree> > vec_defs(3);
6038 vect_get_slp_defs (ops, slp_node, &vec_defs);
6039 vec_oprnds0 = vec_defs[0];
6040 vec_oprnds1 = vec_defs[1];
6041 vec_oprnds2 = vec_defs[2];
6042 }
6043 else
6044 {
6045 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6046 NULL);
6047 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6048 NULL);
6049 }
6050 }
4a00c761
JJ
6051 else
6052 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 6053 slp_node);
4a00c761 6054 }
ebfd146a 6055 else
4a00c761
JJ
6056 {
6057 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6058 if (op_type == ternary_op)
6059 {
9771b263
DN
6060 tree vec_oprnd = vec_oprnds2.pop ();
6061 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6062 vec_oprnd));
4a00c761
JJ
6063 }
6064 }
6065
6066 /* Arguments are ready. Create the new vector stmt. */
9771b263 6067 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6068 {
4a00c761 6069 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6070 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6071 vop2 = ((op_type == ternary_op)
9771b263 6072 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 6073 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
6074 new_temp = make_ssa_name (vec_dest, new_stmt);
6075 gimple_assign_set_lhs (new_stmt, new_temp);
6076 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
6077 if (vec_cvt_dest)
6078 {
6079 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6080 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6081 new_temp);
6082 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6083 gimple_assign_set_lhs (new_stmt, new_temp);
6084 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6085 }
4a00c761 6086 if (slp_node)
9771b263 6087 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
6088 }
6089
4a00c761
JJ
6090 if (slp_node)
6091 continue;
6092
6093 if (j == 0)
6094 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6095 else
6096 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6097 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
6098 }
6099
9771b263
DN
6100 vec_oprnds0.release ();
6101 vec_oprnds1.release ();
6102 vec_oprnds2.release ();
ebfd146a 6103
ebfd146a
IR
6104 return true;
6105}
6106
f702e7d4 6107/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6108
6109static void
f702e7d4 6110ensure_base_align (struct data_reference *dr)
c716e67f 6111{
ca823c85 6112 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
c716e67f
XDL
6113 return;
6114
52639a61 6115 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6116 {
52639a61 6117 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6118
f702e7d4
RS
6119 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6120
428f0c67 6121 if (decl_in_symtab_p (base_decl))
f702e7d4 6122 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6123 else
6124 {
f702e7d4 6125 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6126 DECL_USER_ALIGN (base_decl) = 1;
6127 }
52639a61 6128 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6129 }
6130}
6131
ebfd146a 6132
44fc7854
BE
6133/* Function get_group_alias_ptr_type.
6134
6135 Return the alias type for the group starting at FIRST_STMT. */
6136
6137static tree
6138get_group_alias_ptr_type (gimple *first_stmt)
6139{
6140 struct data_reference *first_dr, *next_dr;
6141 gimple *next_stmt;
6142
6143 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6144 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
44fc7854
BE
6145 while (next_stmt)
6146 {
6147 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6148 if (get_alias_set (DR_REF (first_dr))
6149 != get_alias_set (DR_REF (next_dr)))
6150 {
6151 if (dump_enabled_p ())
6152 dump_printf_loc (MSG_NOTE, vect_location,
6153 "conflicting alias set types.\n");
6154 return ptr_type_node;
6155 }
2c53b149 6156 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
44fc7854
BE
6157 }
6158 return reference_alias_ptr_type (DR_REF (first_dr));
6159}
6160
6161
ebfd146a
IR
6162/* Function vectorizable_store.
6163
b8698a0f
L
6164 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6165 can be vectorized.
6166 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6167 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6168 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6169
6170static bool
355fe088 6171vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 6172 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 6173{
ebfd146a
IR
6174 tree data_ref;
6175 tree op;
6176 tree vec_oprnd = NULL_TREE;
6177 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6178 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6179 tree elem_type;
ebfd146a 6180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6181 struct loop *loop = NULL;
ef4bddc2 6182 machine_mode vec_mode;
ebfd146a
IR
6183 tree dummy;
6184 enum dr_alignment_support alignment_support_scheme;
929b4411
RS
6185 enum vect_def_type rhs_dt = vect_unknown_def_type;
6186 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6187 stmt_vec_info prev_stmt_info = NULL;
6188 tree dataref_ptr = NULL_TREE;
74bf76ed 6189 tree dataref_offset = NULL_TREE;
355fe088 6190 gimple *ptr_incr = NULL;
ebfd146a
IR
6191 int ncopies;
6192 int j;
2de001ee
RS
6193 gimple *next_stmt, *first_stmt;
6194 bool grouped_store;
ebfd146a 6195 unsigned int group_size, i;
6e1aa848
DN
6196 vec<tree> oprnds = vNULL;
6197 vec<tree> result_chain = vNULL;
ebfd146a 6198 bool inv_p;
09dfa495 6199 tree offset = NULL_TREE;
6e1aa848 6200 vec<tree> vec_oprnds = vNULL;
ebfd146a 6201 bool slp = (slp_node != NULL);
ebfd146a 6202 unsigned int vec_num;
a70d6342 6203 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6204 vec_info *vinfo = stmt_info->vinfo;
272c6793 6205 tree aggr_type;
134c85ca 6206 gather_scatter_info gs_info;
355fe088 6207 gimple *new_stmt;
d9f21f6a 6208 poly_uint64 vf;
2de001ee 6209 vec_load_store_type vls_type;
44fc7854 6210 tree ref_type;
a70d6342 6211
a70d6342 6212 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6213 return false;
6214
66c16fd9
RB
6215 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6216 && ! vec_stmt)
ebfd146a
IR
6217 return false;
6218
6219 /* Is vectorizable store? */
6220
c3a8f964
RS
6221 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6222 if (is_gimple_assign (stmt))
6223 {
6224 tree scalar_dest = gimple_assign_lhs (stmt);
6225 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6226 && is_pattern_stmt_p (stmt_info))
6227 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6228 if (TREE_CODE (scalar_dest) != ARRAY_REF
6229 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6230 && TREE_CODE (scalar_dest) != INDIRECT_REF
6231 && TREE_CODE (scalar_dest) != COMPONENT_REF
6232 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6233 && TREE_CODE (scalar_dest) != REALPART_EXPR
6234 && TREE_CODE (scalar_dest) != MEM_REF)
6235 return false;
6236 }
6237 else
6238 {
6239 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6240 if (!call || !gimple_call_internal_p (call))
6241 return false;
6242
6243 internal_fn ifn = gimple_call_internal_fn (call);
6244 if (!internal_store_fn_p (ifn))
c3a8f964 6245 return false;
ebfd146a 6246
c3a8f964
RS
6247 if (slp_node != NULL)
6248 {
6249 if (dump_enabled_p ())
6250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6251 "SLP of masked stores not supported.\n");
6252 return false;
6253 }
6254
f307441a
RS
6255 int mask_index = internal_fn_mask_index (ifn);
6256 if (mask_index >= 0)
6257 {
6258 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6259 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6260 &mask_vectype))
f307441a
RS
6261 return false;
6262 }
c3a8f964
RS
6263 }
6264
6265 op = vect_get_store_rhs (stmt);
ebfd146a 6266
fce57248
RS
6267 /* Cannot have hybrid store SLP -- that would mean storing to the
6268 same location twice. */
6269 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6270
f4d09712 6271 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6272 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6273
6274 if (loop_vinfo)
b17dc4d4
RB
6275 {
6276 loop = LOOP_VINFO_LOOP (loop_vinfo);
6277 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6278 }
6279 else
6280 vf = 1;
465c8c19
JJ
6281
6282 /* Multiple types in SLP are handled by creating the appropriate number of
6283 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6284 case of SLP. */
fce57248 6285 if (slp)
465c8c19
JJ
6286 ncopies = 1;
6287 else
e8f142e2 6288 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6289
6290 gcc_assert (ncopies >= 1);
6291
6292 /* FORNOW. This restriction should be relaxed. */
6293 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6294 {
6295 if (dump_enabled_p ())
6296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6297 "multiple types in nested loop.\n");
6298 return false;
6299 }
6300
929b4411 6301 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6302 return false;
6303
272c6793 6304 elem_type = TREE_TYPE (vectype);
ebfd146a 6305 vec_mode = TYPE_MODE (vectype);
7b7b1813 6306
ebfd146a
IR
6307 if (!STMT_VINFO_DATA_REF (stmt_info))
6308 return false;
6309
2de001ee 6310 vect_memory_access_type memory_access_type;
7e11fc7f 6311 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6312 &memory_access_type, &gs_info))
6313 return false;
3bab6342 6314
c3a8f964
RS
6315 if (mask)
6316 {
7e11fc7f
RS
6317 if (memory_access_type == VMAT_CONTIGUOUS)
6318 {
6319 if (!VECTOR_MODE_P (vec_mode)
6320 || !can_vec_mask_load_store_p (vec_mode,
6321 TYPE_MODE (mask_vectype), false))
6322 return false;
6323 }
f307441a
RS
6324 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6325 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6326 {
6327 if (dump_enabled_p ())
6328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6329 "unsupported access type for masked store.\n");
6330 return false;
6331 }
c3a8f964
RS
6332 }
6333 else
6334 {
6335 /* FORNOW. In some cases can vectorize even if data-type not supported
6336 (e.g. - array initialization with 0). */
6337 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6338 return false;
6339 }
6340
f307441a 6341 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6342 && memory_access_type != VMAT_GATHER_SCATTER
6343 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6344 if (grouped_store)
6345 {
2c53b149 6346 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7cfb4d93 6347 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6348 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7cfb4d93
RS
6349 }
6350 else
6351 {
6352 first_stmt = stmt;
6353 first_dr = dr;
6354 group_size = vec_num = 1;
6355 }
6356
ebfd146a
IR
6357 if (!vec_stmt) /* transformation not required. */
6358 {
2de001ee 6359 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6360
6361 if (loop_vinfo
6362 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6363 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6364 memory_access_type, &gs_info);
7cfb4d93 6365
ebfd146a 6366 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6367 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6368 vls_type, slp_node, cost_vec);
ebfd146a
IR
6369 return true;
6370 }
2de001ee 6371 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6372
67b8dbac 6373 /* Transform. */
ebfd146a 6374
f702e7d4 6375 ensure_base_align (dr);
c716e67f 6376
f307441a 6377 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6378 {
c3a8f964 6379 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6380 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6381 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6382 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6383 edge pe = loop_preheader_edge (loop);
6384 gimple_seq seq;
6385 basic_block new_bb;
6386 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6387 poly_uint64 scatter_off_nunits
6388 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6389
4d694b27 6390 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6391 modifier = NONE;
4d694b27 6392 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6393 {
3bab6342
AT
6394 modifier = WIDEN;
6395
4d694b27
RS
6396 /* Currently gathers and scatters are only supported for
6397 fixed-length vectors. */
6398 unsigned int count = scatter_off_nunits.to_constant ();
6399 vec_perm_builder sel (count, count, 1);
6400 for (i = 0; i < (unsigned int) count; ++i)
6401 sel.quick_push (i | (count / 2));
3bab6342 6402
4d694b27 6403 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6404 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6405 indices);
3bab6342
AT
6406 gcc_assert (perm_mask != NULL_TREE);
6407 }
4d694b27 6408 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6409 {
3bab6342
AT
6410 modifier = NARROW;
6411
4d694b27
RS
6412 /* Currently gathers and scatters are only supported for
6413 fixed-length vectors. */
6414 unsigned int count = nunits.to_constant ();
6415 vec_perm_builder sel (count, count, 1);
6416 for (i = 0; i < (unsigned int) count; ++i)
6417 sel.quick_push (i | (count / 2));
3bab6342 6418
4d694b27 6419 vec_perm_indices indices (sel, 2, count);
e3342de4 6420 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6421 gcc_assert (perm_mask != NULL_TREE);
6422 ncopies *= 2;
6423 }
6424 else
6425 gcc_unreachable ();
6426
134c85ca 6427 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6428 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6429 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6430 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6431 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6432 scaletype = TREE_VALUE (arglist);
6433
6434 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6435 && TREE_CODE (rettype) == VOID_TYPE);
6436
134c85ca 6437 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6438 if (!is_gimple_min_invariant (ptr))
6439 {
6440 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6441 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6442 gcc_assert (!new_bb);
6443 }
6444
6445 /* Currently we support only unconditional scatter stores,
6446 so mask should be all ones. */
6447 mask = build_int_cst (masktype, -1);
6448 mask = vect_init_vector (stmt, mask, masktype, NULL);
6449
134c85ca 6450 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6451
6452 prev_stmt_info = NULL;
6453 for (j = 0; j < ncopies; ++j)
6454 {
6455 if (j == 0)
6456 {
6457 src = vec_oprnd1
c3a8f964 6458 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6459 op = vec_oprnd0
134c85ca 6460 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6461 }
6462 else if (modifier != NONE && (j & 1))
6463 {
6464 if (modifier == WIDEN)
6465 {
6466 src = vec_oprnd1
929b4411 6467 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6468 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6469 stmt, gsi);
6470 }
6471 else if (modifier == NARROW)
6472 {
6473 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6474 stmt, gsi);
6475 op = vec_oprnd0
134c85ca
RS
6476 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6477 vec_oprnd0);
3bab6342
AT
6478 }
6479 else
6480 gcc_unreachable ();
6481 }
6482 else
6483 {
6484 src = vec_oprnd1
929b4411 6485 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6486 op = vec_oprnd0
134c85ca
RS
6487 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6488 vec_oprnd0);
3bab6342
AT
6489 }
6490
6491 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6492 {
928686b1
RS
6493 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6494 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6495 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6496 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6497 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6499 src = var;
6500 }
6501
6502 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6503 {
928686b1
RS
6504 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6505 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6506 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6507 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6508 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6509 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6510 op = var;
6511 }
6512
6513 new_stmt
134c85ca 6514 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6515
6516 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6517
6518 if (prev_stmt_info == NULL)
6519 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6520 else
6521 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6522 prev_stmt_info = vinfo_for_stmt (new_stmt);
6523 }
6524 return true;
6525 }
6526
f307441a 6527 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6528 {
2c53b149
RB
6529 gimple *group_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6530 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
f307441a 6531 }
ebfd146a 6532
f307441a
RS
6533 if (grouped_store)
6534 {
ebfd146a 6535 /* FORNOW */
a70d6342 6536 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6537
6538 /* We vectorize all the stmts of the interleaving group when we
6539 reach the last stmt in the group. */
2c53b149
RB
6540 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6541 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6542 && !slp)
6543 {
6544 *vec_stmt = NULL;
6545 return true;
6546 }
6547
6548 if (slp)
4b5caab7 6549 {
0d0293ac 6550 grouped_store = false;
4b5caab7
IR
6551 /* VEC_NUM is the number of vect stmts to be created for this
6552 group. */
6553 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6554 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2c53b149 6555 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6556 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6557 op = vect_get_store_rhs (first_stmt);
4b5caab7 6558 }
ebfd146a 6559 else
4b5caab7
IR
6560 /* VEC_NUM is the number of vect stmts to be created for this
6561 group. */
ebfd146a 6562 vec_num = group_size;
44fc7854
BE
6563
6564 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6565 }
b8698a0f 6566 else
7cfb4d93 6567 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6568
73fbfcad 6569 if (dump_enabled_p ())
78c60e3d 6570 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6571 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6572
2de001ee
RS
6573 if (memory_access_type == VMAT_ELEMENTWISE
6574 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6575 {
6576 gimple_stmt_iterator incr_gsi;
6577 bool insert_after;
355fe088 6578 gimple *incr;
f2e2a985
MM
6579 tree offvar;
6580 tree ivstep;
6581 tree running_off;
f2e2a985
MM
6582 tree stride_base, stride_step, alias_off;
6583 tree vec_oprnd;
f502d50e 6584 unsigned int g;
4d694b27
RS
6585 /* Checked by get_load_store_type. */
6586 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6587
7cfb4d93 6588 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6589 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6590
6591 stride_base
6592 = fold_build_pointer_plus
b210f45f 6593 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6594 size_binop (PLUS_EXPR,
b210f45f 6595 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6596 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6597 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6598
6599 /* For a store with loop-invariant (but other than power-of-2)
6600 stride (i.e. not a grouped access) like so:
6601
6602 for (i = 0; i < n; i += stride)
6603 array[i] = ...;
6604
6605 we generate a new induction variable and new stores from
6606 the components of the (vectorized) rhs:
6607
6608 for (j = 0; ; j += VF*stride)
6609 vectemp = ...;
6610 tmp1 = vectemp[0];
6611 array[j] = tmp1;
6612 tmp2 = vectemp[1];
6613 array[j + stride] = tmp2;
6614 ...
6615 */
6616
4d694b27 6617 unsigned nstores = const_nunits;
b17dc4d4 6618 unsigned lnel = 1;
cee62fee 6619 tree ltype = elem_type;
04199738 6620 tree lvectype = vectype;
cee62fee
MM
6621 if (slp)
6622 {
4d694b27
RS
6623 if (group_size < const_nunits
6624 && const_nunits % group_size == 0)
b17dc4d4 6625 {
4d694b27 6626 nstores = const_nunits / group_size;
b17dc4d4
RB
6627 lnel = group_size;
6628 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6629 lvectype = vectype;
6630
6631 /* First check if vec_extract optab doesn't support extraction
6632 of vector elts directly. */
b397965c 6633 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6634 machine_mode vmode;
6635 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6636 || !VECTOR_MODE_P (vmode)
414fef4e 6637 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6638 || (convert_optab_handler (vec_extract_optab,
6639 TYPE_MODE (vectype), vmode)
6640 == CODE_FOR_nothing))
6641 {
6642 /* Try to avoid emitting an extract of vector elements
6643 by performing the extracts using an integer type of the
6644 same size, extracting from a vector of those and then
6645 re-interpreting it as the original vector type if
6646 supported. */
6647 unsigned lsize
6648 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6649 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6650 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6651 /* If we can't construct such a vector fall back to
6652 element extracts from the original vector type and
6653 element size stores. */
4d694b27 6654 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6655 && VECTOR_MODE_P (vmode)
414fef4e 6656 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6657 && (convert_optab_handler (vec_extract_optab,
6658 vmode, elmode)
6659 != CODE_FOR_nothing))
6660 {
4d694b27 6661 nstores = lnunits;
04199738
RB
6662 lnel = group_size;
6663 ltype = build_nonstandard_integer_type (lsize, 1);
6664 lvectype = build_vector_type (ltype, nstores);
6665 }
6666 /* Else fall back to vector extraction anyway.
6667 Fewer stores are more important than avoiding spilling
6668 of the vector we extract from. Compared to the
6669 construction case in vectorizable_load no store-forwarding
6670 issue exists here for reasonable archs. */
6671 }
b17dc4d4 6672 }
4d694b27
RS
6673 else if (group_size >= const_nunits
6674 && group_size % const_nunits == 0)
b17dc4d4
RB
6675 {
6676 nstores = 1;
4d694b27 6677 lnel = const_nunits;
b17dc4d4 6678 ltype = vectype;
04199738 6679 lvectype = vectype;
b17dc4d4 6680 }
cee62fee
MM
6681 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6682 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6683 }
6684
f2e2a985
MM
6685 ivstep = stride_step;
6686 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6687 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6688
6689 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6690
b210f45f
RB
6691 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6692 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6693 create_iv (stride_base, ivstep, NULL,
6694 loop, &incr_gsi, insert_after,
6695 &offvar, NULL);
6696 incr = gsi_stmt (incr_gsi);
310213d4 6697 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985 6698
b210f45f 6699 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6700
6701 prev_stmt_info = NULL;
44fc7854 6702 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6703 next_stmt = first_stmt;
6704 for (g = 0; g < group_size; g++)
f2e2a985 6705 {
f502d50e
MM
6706 running_off = offvar;
6707 if (g)
f2e2a985 6708 {
f502d50e
MM
6709 tree size = TYPE_SIZE_UNIT (ltype);
6710 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6711 size);
f502d50e 6712 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6713 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6714 running_off, pos);
f2e2a985 6715 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6716 running_off = newoff;
f502d50e 6717 }
b17dc4d4
RB
6718 unsigned int group_el = 0;
6719 unsigned HOST_WIDE_INT
6720 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6721 for (j = 0; j < ncopies; j++)
6722 {
c3a8f964 6723 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6724 and first_stmt == stmt. */
6725 if (j == 0)
6726 {
6727 if (slp)
6728 {
6729 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6730 slp_node);
f502d50e
MM
6731 vec_oprnd = vec_oprnds[0];
6732 }
6733 else
6734 {
c3a8f964 6735 op = vect_get_store_rhs (next_stmt);
81c40241 6736 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6737 }
6738 }
f2e2a985 6739 else
f502d50e
MM
6740 {
6741 if (slp)
6742 vec_oprnd = vec_oprnds[j];
6743 else
c079cbac 6744 {
894dd753 6745 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411
RS
6746 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6747 vec_oprnd);
c079cbac 6748 }
f502d50e 6749 }
04199738
RB
6750 /* Pun the vector to extract from if necessary. */
6751 if (lvectype != vectype)
6752 {
6753 tree tem = make_ssa_name (lvectype);
6754 gimple *pun
6755 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6756 lvectype, vec_oprnd));
6757 vect_finish_stmt_generation (stmt, pun, gsi);
6758 vec_oprnd = tem;
6759 }
f502d50e
MM
6760 for (i = 0; i < nstores; i++)
6761 {
6762 tree newref, newoff;
355fe088 6763 gimple *incr, *assign;
f502d50e
MM
6764 tree size = TYPE_SIZE (ltype);
6765 /* Extract the i'th component. */
6766 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6767 bitsize_int (i), size);
6768 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6769 size, pos);
6770
6771 elem = force_gimple_operand_gsi (gsi, elem, true,
6772 NULL_TREE, true,
6773 GSI_SAME_STMT);
6774
b17dc4d4
RB
6775 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6776 group_el * elsz);
f502d50e 6777 newref = build2 (MEM_REF, ltype,
b17dc4d4 6778 running_off, this_off);
19986382 6779 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6780
6781 /* And store it to *running_off. */
6782 assign = gimple_build_assign (newref, elem);
6783 vect_finish_stmt_generation (stmt, assign, gsi);
6784
b17dc4d4
RB
6785 group_el += lnel;
6786 if (! slp
6787 || group_el == group_size)
6788 {
6789 newoff = copy_ssa_name (running_off, NULL);
6790 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6791 running_off, stride_step);
6792 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6793
b17dc4d4
RB
6794 running_off = newoff;
6795 group_el = 0;
6796 }
225ce44b
RB
6797 if (g == group_size - 1
6798 && !slp)
f502d50e
MM
6799 {
6800 if (j == 0 && i == 0)
225ce44b
RB
6801 STMT_VINFO_VEC_STMT (stmt_info)
6802 = *vec_stmt = assign;
f502d50e
MM
6803 else
6804 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6805 prev_stmt_info = vinfo_for_stmt (assign);
6806 }
6807 }
f2e2a985 6808 }
2c53b149 6809 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6810 if (slp)
6811 break;
f2e2a985 6812 }
778dd3b6
RB
6813
6814 vec_oprnds.release ();
f2e2a985
MM
6815 return true;
6816 }
6817
8c681247 6818 auto_vec<tree> dr_chain (group_size);
9771b263 6819 oprnds.create (group_size);
ebfd146a 6820
720f5239 6821 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6822 gcc_assert (alignment_support_scheme);
70088b95
RS
6823 vec_loop_masks *loop_masks
6824 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6825 ? &LOOP_VINFO_MASKS (loop_vinfo)
6826 : NULL);
272c6793 6827 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6828 realignment. vect_supportable_dr_alignment always returns either
6829 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6830 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6831 && !mask
70088b95 6832 && !loop_masks)
272c6793
RS
6833 || alignment_support_scheme == dr_aligned
6834 || alignment_support_scheme == dr_unaligned_supported);
6835
62da9e14
RS
6836 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6837 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6838 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6839
f307441a
RS
6840 tree bump;
6841 tree vec_offset = NULL_TREE;
6842 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6843 {
6844 aggr_type = NULL_TREE;
6845 bump = NULL_TREE;
6846 }
6847 else if (memory_access_type == VMAT_GATHER_SCATTER)
6848 {
6849 aggr_type = elem_type;
6850 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6851 &bump, &vec_offset);
6852 }
272c6793 6853 else
f307441a
RS
6854 {
6855 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6856 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6857 else
6858 aggr_type = vectype;
6859 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6860 }
ebfd146a 6861
c3a8f964
RS
6862 if (mask)
6863 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6864
ebfd146a
IR
6865 /* In case the vectorization factor (VF) is bigger than the number
6866 of elements that we can fit in a vectype (nunits), we have to generate
6867 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6868 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6869 vect_get_vec_def_for_copy_stmt. */
6870
0d0293ac 6871 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6872
6873 S1: &base + 2 = x2
6874 S2: &base = x0
6875 S3: &base + 1 = x1
6876 S4: &base + 3 = x3
6877
6878 We create vectorized stores starting from base address (the access of the
6879 first stmt in the chain (S2 in the above example), when the last store stmt
6880 of the chain (S4) is reached:
6881
6882 VS1: &base = vx2
6883 VS2: &base + vec_size*1 = vx0
6884 VS3: &base + vec_size*2 = vx1
6885 VS4: &base + vec_size*3 = vx3
6886
6887 Then permutation statements are generated:
6888
3fcc1b55
JJ
6889 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6890 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6891 ...
b8698a0f 6892
ebfd146a
IR
6893 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6894 (the order of the data-refs in the output of vect_permute_store_chain
6895 corresponds to the order of scalar stmts in the interleaving chain - see
6896 the documentation of vect_permute_store_chain()).
6897
6898 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6899 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6900 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6901 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6902 */
6903
6904 prev_stmt_info = NULL;
c3a8f964 6905 tree vec_mask = NULL_TREE;
ebfd146a
IR
6906 for (j = 0; j < ncopies; j++)
6907 {
ebfd146a
IR
6908
6909 if (j == 0)
6910 {
6911 if (slp)
6912 {
6913 /* Get vectorized arguments for SLP_NODE. */
d092494c 6914 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6915 NULL, slp_node);
ebfd146a 6916
9771b263 6917 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6918 }
6919 else
6920 {
b8698a0f
L
6921 /* For interleaved stores we collect vectorized defs for all the
6922 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6923 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6924 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6925
2c53b149 6926 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6927 OPRNDS are of size 1. */
b8698a0f 6928 next_stmt = first_stmt;
ebfd146a
IR
6929 for (i = 0; i < group_size; i++)
6930 {
b8698a0f 6931 /* Since gaps are not supported for interleaved stores,
2c53b149 6932 DR_GROUP_SIZE is the exact number of stmts in the chain.
b8698a0f 6933 Therefore, NEXT_STMT can't be NULL_TREE. In case that
2c53b149 6934 there is no interleaving, DR_GROUP_SIZE is 1, and only one
ebfd146a 6935 iteration of the loop will be executed. */
c3a8f964 6936 op = vect_get_store_rhs (next_stmt);
81c40241 6937 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6938 dr_chain.quick_push (vec_oprnd);
6939 oprnds.quick_push (vec_oprnd);
2c53b149 6940 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6941 }
c3a8f964
RS
6942 if (mask)
6943 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6944 mask_vectype);
ebfd146a
IR
6945 }
6946
6947 /* We should have catched mismatched types earlier. */
6948 gcc_assert (useless_type_conversion_p (vectype,
6949 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6950 bool simd_lane_access_p
6951 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6952 if (simd_lane_access_p
6953 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6954 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6955 && integer_zerop (DR_OFFSET (first_dr))
6956 && integer_zerop (DR_INIT (first_dr))
6957 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6958 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6959 {
6960 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6961 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6962 inv_p = false;
74bf76ed 6963 }
f307441a
RS
6964 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6965 {
6966 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6967 &dataref_ptr, &vec_offset);
6968 inv_p = false;
6969 }
74bf76ed
JJ
6970 else
6971 dataref_ptr
6972 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6973 simd_lane_access_p ? loop : NULL,
09dfa495 6974 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
6975 simd_lane_access_p, &inv_p,
6976 NULL_TREE, bump);
a70d6342 6977 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6978 }
b8698a0f 6979 else
ebfd146a 6980 {
b8698a0f
L
6981 /* For interleaved stores we created vectorized defs for all the
6982 defs stored in OPRNDS in the previous iteration (previous copy).
6983 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6984 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6985 next copy.
2c53b149 6986 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6987 OPRNDS are of size 1. */
6988 for (i = 0; i < group_size; i++)
6989 {
9771b263 6990 op = oprnds[i];
894dd753 6991 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411 6992 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
6993 dr_chain[i] = vec_oprnd;
6994 oprnds[i] = vec_oprnd;
ebfd146a 6995 }
c3a8f964 6996 if (mask)
929b4411 6997 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
6998 if (dataref_offset)
6999 dataref_offset
f307441a
RS
7000 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7001 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
7002 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7003 vec_offset);
74bf76ed
JJ
7004 else
7005 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 7006 bump);
ebfd146a
IR
7007 }
7008
2de001ee 7009 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7010 {
272c6793 7011 tree vec_array;
267d3070 7012
3ba4ff41 7013 /* Get an array into which we can store the individual vectors. */
272c6793 7014 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7015
7016 /* Invalidate the current contents of VEC_ARRAY. This should
7017 become an RTL clobber too, which prevents the vector registers
7018 from being upward-exposed. */
7019 vect_clobber_variable (stmt, gsi, vec_array);
7020
7021 /* Store the individual vectors into the array. */
272c6793 7022 for (i = 0; i < vec_num; i++)
c2d7ab2a 7023 {
9771b263 7024 vec_oprnd = dr_chain[i];
272c6793 7025 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 7026 }
b8698a0f 7027
7cfb4d93 7028 tree final_mask = NULL;
70088b95
RS
7029 if (loop_masks)
7030 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7031 vectype, j);
7cfb4d93
RS
7032 if (vec_mask)
7033 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7034 vec_mask, gsi);
7035
7e11fc7f 7036 gcall *call;
7cfb4d93 7037 if (final_mask)
7e11fc7f
RS
7038 {
7039 /* Emit:
7040 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7041 VEC_ARRAY). */
7042 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7043 tree alias_ptr = build_int_cst (ref_type, align);
7044 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7045 dataref_ptr, alias_ptr,
7cfb4d93 7046 final_mask, vec_array);
7e11fc7f
RS
7047 }
7048 else
7049 {
7050 /* Emit:
7051 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7052 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7053 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7054 vec_array);
7055 gimple_call_set_lhs (call, data_ref);
7056 }
a844293d
RS
7057 gimple_call_set_nothrow (call, true);
7058 new_stmt = call;
267d3070 7059 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41
RS
7060
7061 /* Record that VEC_ARRAY is now dead. */
7062 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
7063 }
7064 else
7065 {
7066 new_stmt = NULL;
0d0293ac 7067 if (grouped_store)
272c6793 7068 {
b6b9227d
JJ
7069 if (j == 0)
7070 result_chain.create (group_size);
272c6793
RS
7071 /* Permute. */
7072 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7073 &result_chain);
7074 }
c2d7ab2a 7075
272c6793
RS
7076 next_stmt = first_stmt;
7077 for (i = 0; i < vec_num; i++)
7078 {
644ffefd 7079 unsigned align, misalign;
272c6793 7080
7cfb4d93 7081 tree final_mask = NULL_TREE;
70088b95
RS
7082 if (loop_masks)
7083 final_mask = vect_get_loop_mask (gsi, loop_masks,
7084 vec_num * ncopies,
7cfb4d93
RS
7085 vectype, vec_num * j + i);
7086 if (vec_mask)
7087 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7088 vec_mask, gsi);
7089
f307441a
RS
7090 if (memory_access_type == VMAT_GATHER_SCATTER)
7091 {
7092 tree scale = size_int (gs_info.scale);
7093 gcall *call;
70088b95 7094 if (loop_masks)
f307441a
RS
7095 call = gimple_build_call_internal
7096 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7097 scale, vec_oprnd, final_mask);
7098 else
7099 call = gimple_build_call_internal
7100 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7101 scale, vec_oprnd);
7102 gimple_call_set_nothrow (call, true);
7103 new_stmt = call;
7104 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7105 break;
7106 }
7107
272c6793
RS
7108 if (i > 0)
7109 /* Bump the vector pointer. */
7110 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7111 stmt, bump);
272c6793
RS
7112
7113 if (slp)
9771b263 7114 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7115 else if (grouped_store)
7116 /* For grouped stores vectorized defs are interleaved in
272c6793 7117 vect_permute_store_chain(). */
9771b263 7118 vec_oprnd = result_chain[i];
272c6793 7119
f702e7d4 7120 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7121 if (aligned_access_p (first_dr))
644ffefd 7122 misalign = 0;
272c6793
RS
7123 else if (DR_MISALIGNMENT (first_dr) == -1)
7124 {
25f68d90 7125 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7126 misalign = 0;
272c6793
RS
7127 }
7128 else
c3a8f964 7129 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7130 if (dataref_offset == NULL_TREE
7131 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7132 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7133 misalign);
c2d7ab2a 7134
62da9e14 7135 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7136 {
7137 tree perm_mask = perm_mask_for_reverse (vectype);
7138 tree perm_dest
c3a8f964 7139 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7140 vectype);
b731b390 7141 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7142
7143 /* Generate the permute statement. */
355fe088 7144 gimple *perm_stmt
0d0e4a03
JJ
7145 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7146 vec_oprnd, perm_mask);
09dfa495
BM
7147 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7148
7149 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7150 vec_oprnd = new_temp;
7151 }
7152
272c6793 7153 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7154 if (final_mask)
c3a8f964
RS
7155 {
7156 align = least_bit_hwi (misalign | align);
7157 tree ptr = build_int_cst (ref_type, align);
7158 gcall *call
7159 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7160 dataref_ptr, ptr,
7cfb4d93 7161 final_mask, vec_oprnd);
c3a8f964
RS
7162 gimple_call_set_nothrow (call, true);
7163 new_stmt = call;
7164 }
7165 else
7166 {
7167 data_ref = fold_build2 (MEM_REF, vectype,
7168 dataref_ptr,
7169 dataref_offset
7170 ? dataref_offset
7171 : build_int_cst (ref_type, 0));
7172 if (aligned_access_p (first_dr))
7173 ;
7174 else if (DR_MISALIGNMENT (first_dr) == -1)
7175 TREE_TYPE (data_ref)
7176 = build_aligned_type (TREE_TYPE (data_ref),
7177 align * BITS_PER_UNIT);
7178 else
7179 TREE_TYPE (data_ref)
7180 = build_aligned_type (TREE_TYPE (data_ref),
7181 TYPE_ALIGN (elem_type));
19986382 7182 vect_copy_ref_info (data_ref, DR_REF (first_dr));
c3a8f964
RS
7183 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7184 }
272c6793 7185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
7186
7187 if (slp)
7188 continue;
7189
2c53b149 7190 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
7191 if (!next_stmt)
7192 break;
7193 }
ebfd146a 7194 }
1da0876c
RS
7195 if (!slp)
7196 {
7197 if (j == 0)
7198 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7199 else
7200 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7201 prev_stmt_info = vinfo_for_stmt (new_stmt);
7202 }
ebfd146a
IR
7203 }
7204
9771b263
DN
7205 oprnds.release ();
7206 result_chain.release ();
7207 vec_oprnds.release ();
ebfd146a
IR
7208
7209 return true;
7210}
7211
557be5a8
AL
7212/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7213 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7214 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7215 vect_gen_perm_mask_checked. */
a1e53f3f 7216
3fcc1b55 7217tree
4aae3cb3 7218vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7219{
b00cb3bf 7220 tree mask_type;
a1e53f3f 7221
0ecc2b7d
RS
7222 poly_uint64 nunits = sel.length ();
7223 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7224
7225 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7226 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7227}
7228
7ac7e286 7229/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7230 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7231
7232tree
4aae3cb3 7233vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7234{
7ac7e286 7235 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7236 return vect_gen_perm_mask_any (vectype, sel);
7237}
7238
aec7ae7d
JJ
7239/* Given a vector variable X and Y, that was generated for the scalar
7240 STMT, generate instructions to permute the vector elements of X and Y
7241 using permutation mask MASK_VEC, insert them at *GSI and return the
7242 permuted vector variable. */
a1e53f3f
L
7243
7244static tree
355fe088 7245permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7246 gimple_stmt_iterator *gsi)
a1e53f3f
L
7247{
7248 tree vectype = TREE_TYPE (x);
aec7ae7d 7249 tree perm_dest, data_ref;
355fe088 7250 gimple *perm_stmt;
a1e53f3f 7251
7ad429a4
RS
7252 tree scalar_dest = gimple_get_lhs (stmt);
7253 if (TREE_CODE (scalar_dest) == SSA_NAME)
7254 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7255 else
7256 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7257 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7258
7259 /* Generate the permute statement. */
0d0e4a03 7260 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7261 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7262
7263 return data_ref;
7264}
7265
6b916b36
RB
7266/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7267 inserting them on the loops preheader edge. Returns true if we
7268 were successful in doing so (and thus STMT can be moved then),
7269 otherwise returns false. */
7270
7271static bool
355fe088 7272hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7273{
7274 ssa_op_iter i;
7275 tree op;
7276 bool any = false;
7277
7278 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7279 {
355fe088 7280 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7281 if (!gimple_nop_p (def_stmt)
7282 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7283 {
7284 /* Make sure we don't need to recurse. While we could do
7285 so in simple cases when there are more complex use webs
7286 we don't have an easy way to preserve stmt order to fulfil
7287 dependencies within them. */
7288 tree op2;
7289 ssa_op_iter i2;
d1417442
JJ
7290 if (gimple_code (def_stmt) == GIMPLE_PHI)
7291 return false;
6b916b36
RB
7292 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7293 {
355fe088 7294 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7295 if (!gimple_nop_p (def_stmt2)
7296 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7297 return false;
7298 }
7299 any = true;
7300 }
7301 }
7302
7303 if (!any)
7304 return true;
7305
7306 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7307 {
355fe088 7308 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7309 if (!gimple_nop_p (def_stmt)
7310 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7311 {
7312 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7313 gsi_remove (&gsi, false);
7314 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7315 }
7316 }
7317
7318 return true;
7319}
7320
ebfd146a
IR
7321/* vectorizable_load.
7322
b8698a0f
L
7323 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7324 can be vectorized.
7325 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7326 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7327 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7328
7329static bool
355fe088 7330vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2
RB
7331 slp_tree slp_node, slp_instance slp_node_instance,
7332 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7333{
7334 tree scalar_dest;
7335 tree vec_dest = NULL;
7336 tree data_ref = NULL;
7337 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7338 stmt_vec_info prev_stmt_info;
ebfd146a 7339 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7340 struct loop *loop = NULL;
ebfd146a 7341 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7342 bool nested_in_vect_loop = false;
c716e67f 7343 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7344 tree elem_type;
ebfd146a 7345 tree new_temp;
ef4bddc2 7346 machine_mode mode;
355fe088 7347 gimple *new_stmt = NULL;
ebfd146a
IR
7348 tree dummy;
7349 enum dr_alignment_support alignment_support_scheme;
7350 tree dataref_ptr = NULL_TREE;
74bf76ed 7351 tree dataref_offset = NULL_TREE;
355fe088 7352 gimple *ptr_incr = NULL;
ebfd146a 7353 int ncopies;
4d694b27
RS
7354 int i, j;
7355 unsigned int group_size;
7356 poly_uint64 group_gap_adj;
ebfd146a
IR
7357 tree msq = NULL_TREE, lsq;
7358 tree offset = NULL_TREE;
356bbc4c 7359 tree byte_offset = NULL_TREE;
ebfd146a 7360 tree realignment_token = NULL_TREE;
538dd0b7 7361 gphi *phi = NULL;
6e1aa848 7362 vec<tree> dr_chain = vNULL;
0d0293ac 7363 bool grouped_load = false;
355fe088 7364 gimple *first_stmt;
4f0a0218 7365 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
7366 bool inv_p;
7367 bool compute_in_loop = false;
7368 struct loop *at_loop;
7369 int vec_num;
7370 bool slp = (slp_node != NULL);
7371 bool slp_perm = false;
a70d6342 7372 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7373 poly_uint64 vf;
272c6793 7374 tree aggr_type;
134c85ca 7375 gather_scatter_info gs_info;
310213d4 7376 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7377 tree ref_type;
929b4411 7378 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7379
465c8c19
JJ
7380 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7381 return false;
7382
66c16fd9
RB
7383 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7384 && ! vec_stmt)
465c8c19
JJ
7385 return false;
7386
c3a8f964
RS
7387 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7388 if (is_gimple_assign (stmt))
7389 {
7390 scalar_dest = gimple_assign_lhs (stmt);
7391 if (TREE_CODE (scalar_dest) != SSA_NAME)
7392 return false;
465c8c19 7393
c3a8f964
RS
7394 tree_code code = gimple_assign_rhs_code (stmt);
7395 if (code != ARRAY_REF
7396 && code != BIT_FIELD_REF
7397 && code != INDIRECT_REF
7398 && code != COMPONENT_REF
7399 && code != IMAGPART_EXPR
7400 && code != REALPART_EXPR
7401 && code != MEM_REF
7402 && TREE_CODE_CLASS (code) != tcc_declaration)
7403 return false;
7404 }
7405 else
7406 {
7407 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7408 if (!call || !gimple_call_internal_p (call))
7409 return false;
7410
7411 internal_fn ifn = gimple_call_internal_fn (call);
7412 if (!internal_load_fn_p (ifn))
c3a8f964 7413 return false;
465c8c19 7414
c3a8f964
RS
7415 scalar_dest = gimple_call_lhs (call);
7416 if (!scalar_dest)
7417 return false;
7418
7419 if (slp_node != NULL)
7420 {
7421 if (dump_enabled_p ())
7422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7423 "SLP of masked loads not supported.\n");
7424 return false;
7425 }
7426
bfaa08b7
RS
7427 int mask_index = internal_fn_mask_index (ifn);
7428 if (mask_index >= 0)
7429 {
7430 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7431 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7432 &mask_vectype))
bfaa08b7
RS
7433 return false;
7434 }
c3a8f964 7435 }
465c8c19
JJ
7436
7437 if (!STMT_VINFO_DATA_REF (stmt_info))
7438 return false;
7439
7440 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7441 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7442
a70d6342
IR
7443 if (loop_vinfo)
7444 {
7445 loop = LOOP_VINFO_LOOP (loop_vinfo);
7446 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7447 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7448 }
7449 else
3533e503 7450 vf = 1;
ebfd146a
IR
7451
7452 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7454 case of SLP. */
fce57248 7455 if (slp)
ebfd146a
IR
7456 ncopies = 1;
7457 else
e8f142e2 7458 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7459
7460 gcc_assert (ncopies >= 1);
7461
7462 /* FORNOW. This restriction should be relaxed. */
7463 if (nested_in_vect_loop && ncopies > 1)
7464 {
73fbfcad 7465 if (dump_enabled_p ())
78c60e3d 7466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7467 "multiple types in nested loop.\n");
ebfd146a
IR
7468 return false;
7469 }
7470
f2556b68
RB
7471 /* Invalidate assumptions made by dependence analysis when vectorization
7472 on the unrolled body effectively re-orders stmts. */
7473 if (ncopies > 1
7474 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7475 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7476 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7477 {
7478 if (dump_enabled_p ())
7479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7480 "cannot perform implicit CSE when unrolling "
7481 "with negative dependence distance\n");
7482 return false;
7483 }
7484
7b7b1813 7485 elem_type = TREE_TYPE (vectype);
947131ba 7486 mode = TYPE_MODE (vectype);
ebfd146a
IR
7487
7488 /* FORNOW. In some cases can vectorize even if data-type not supported
7489 (e.g. - data copies). */
947131ba 7490 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7491 {
73fbfcad 7492 if (dump_enabled_p ())
78c60e3d 7493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7494 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7495 return false;
7496 }
7497
ebfd146a 7498 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7499 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7500 {
0d0293ac 7501 grouped_load = true;
ebfd146a 7502 /* FORNOW */
2de001ee
RS
7503 gcc_assert (!nested_in_vect_loop);
7504 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7505
2c53b149
RB
7506 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7507 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7508
b1af7da6
RB
7509 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7510 slp_perm = true;
7511
f2556b68
RB
7512 /* Invalidate assumptions made by dependence analysis when vectorization
7513 on the unrolled body effectively re-orders stmts. */
7514 if (!PURE_SLP_STMT (stmt_info)
7515 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7516 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7517 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7518 {
7519 if (dump_enabled_p ())
7520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7521 "cannot perform implicit CSE when performing "
7522 "group loads with negative dependence distance\n");
7523 return false;
7524 }
96bb56b2
RB
7525
7526 /* Similarly when the stmt is a load that is both part of a SLP
7527 instance and a loop vectorized stmt via the same-dr mechanism
7528 we have to give up. */
2c53b149 7529 if (DR_GROUP_SAME_DR_STMT (stmt_info)
96bb56b2
RB
7530 && (STMT_SLP_TYPE (stmt_info)
7531 != STMT_SLP_TYPE (vinfo_for_stmt
2c53b149 7532 (DR_GROUP_SAME_DR_STMT (stmt_info)))))
96bb56b2
RB
7533 {
7534 if (dump_enabled_p ())
7535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7536 "conflicting SLP types for CSEd load\n");
7537 return false;
7538 }
ebfd146a 7539 }
7cfb4d93
RS
7540 else
7541 group_size = 1;
ebfd146a 7542
2de001ee 7543 vect_memory_access_type memory_access_type;
7e11fc7f 7544 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7545 &memory_access_type, &gs_info))
7546 return false;
a1e53f3f 7547
c3a8f964
RS
7548 if (mask)
7549 {
7550 if (memory_access_type == VMAT_CONTIGUOUS)
7551 {
7e11fc7f
RS
7552 machine_mode vec_mode = TYPE_MODE (vectype);
7553 if (!VECTOR_MODE_P (vec_mode)
7554 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7555 TYPE_MODE (mask_vectype), true))
7556 return false;
7557 }
bfaa08b7 7558 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7559 {
7560 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7561 tree masktype
7562 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7563 if (TREE_CODE (masktype) == INTEGER_TYPE)
7564 {
7565 if (dump_enabled_p ())
7566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7567 "masked gather with integer mask not"
7568 " supported.");
7569 return false;
7570 }
7571 }
bfaa08b7
RS
7572 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7573 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7574 {
7575 if (dump_enabled_p ())
7576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7577 "unsupported access type for masked load.\n");
7578 return false;
7579 }
7580 }
7581
ebfd146a
IR
7582 if (!vec_stmt) /* transformation not required. */
7583 {
2de001ee
RS
7584 if (!slp)
7585 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7586
7587 if (loop_vinfo
7588 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7589 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7590 memory_access_type, &gs_info);
7cfb4d93 7591
ebfd146a 7592 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7593 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7594 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7595 return true;
7596 }
7597
2de001ee
RS
7598 if (!slp)
7599 gcc_assert (memory_access_type
7600 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7601
73fbfcad 7602 if (dump_enabled_p ())
78c60e3d 7603 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7604 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7605
67b8dbac 7606 /* Transform. */
ebfd146a 7607
f702e7d4 7608 ensure_base_align (dr);
c716e67f 7609
bfaa08b7 7610 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7611 {
929b4411
RS
7612 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7613 mask_dt);
aec7ae7d
JJ
7614 return true;
7615 }
2de001ee
RS
7616
7617 if (memory_access_type == VMAT_ELEMENTWISE
7618 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7619 {
7620 gimple_stmt_iterator incr_gsi;
7621 bool insert_after;
355fe088 7622 gimple *incr;
7d75abc8 7623 tree offvar;
7d75abc8
MM
7624 tree ivstep;
7625 tree running_off;
9771b263 7626 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7627 tree stride_base, stride_step, alias_off;
4d694b27
RS
7628 /* Checked by get_load_store_type. */
7629 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7630 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7631
7cfb4d93 7632 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7633 gcc_assert (!nested_in_vect_loop);
7d75abc8 7634
b210f45f 7635 if (grouped_load)
44fc7854 7636 {
2c53b149 7637 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7638 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
44fc7854 7639 }
ab313a8c 7640 else
44fc7854
BE
7641 {
7642 first_stmt = stmt;
7643 first_dr = dr;
b210f45f
RB
7644 }
7645 if (slp && grouped_load)
7646 {
2c53b149 7647 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
b210f45f
RB
7648 ref_type = get_group_alias_ptr_type (first_stmt);
7649 }
7650 else
7651 {
7652 if (grouped_load)
7653 cst_offset
7654 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7655 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
44fc7854 7656 group_size = 1;
b210f45f 7657 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7658 }
ab313a8c 7659
14ac6aa2
RB
7660 stride_base
7661 = fold_build_pointer_plus
ab313a8c 7662 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7663 size_binop (PLUS_EXPR,
ab313a8c
RB
7664 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7665 convert_to_ptrofftype (DR_INIT (first_dr))));
7666 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7667
7668 /* For a load with loop-invariant (but other than power-of-2)
7669 stride (i.e. not a grouped access) like so:
7670
7671 for (i = 0; i < n; i += stride)
7672 ... = array[i];
7673
7674 we generate a new induction variable and new accesses to
7675 form a new vector (or vectors, depending on ncopies):
7676
7677 for (j = 0; ; j += VF*stride)
7678 tmp1 = array[j];
7679 tmp2 = array[j + stride];
7680 ...
7681 vectemp = {tmp1, tmp2, ...}
7682 */
7683
ab313a8c
RB
7684 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7685 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7686
7687 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7688
b210f45f
RB
7689 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7690 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7691 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7692 loop, &incr_gsi, insert_after,
7693 &offvar, NULL);
7694 incr = gsi_stmt (incr_gsi);
310213d4 7695 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7696
b210f45f 7697 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7698
7699 prev_stmt_info = NULL;
7700 running_off = offvar;
44fc7854 7701 alias_off = build_int_cst (ref_type, 0);
4d694b27 7702 int nloads = const_nunits;
e09b4c37 7703 int lnel = 1;
7b5fc413 7704 tree ltype = TREE_TYPE (vectype);
ea60dd34 7705 tree lvectype = vectype;
b266b968 7706 auto_vec<tree> dr_chain;
2de001ee 7707 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7708 {
4d694b27 7709 if (group_size < const_nunits)
e09b4c37 7710 {
ff03930a
JJ
7711 /* First check if vec_init optab supports construction from
7712 vector elts directly. */
b397965c 7713 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7714 machine_mode vmode;
7715 if (mode_for_vector (elmode, group_size).exists (&vmode)
7716 && VECTOR_MODE_P (vmode)
414fef4e 7717 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7718 && (convert_optab_handler (vec_init_optab,
7719 TYPE_MODE (vectype), vmode)
7720 != CODE_FOR_nothing))
ea60dd34 7721 {
4d694b27 7722 nloads = const_nunits / group_size;
ea60dd34 7723 lnel = group_size;
ff03930a
JJ
7724 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7725 }
7726 else
7727 {
7728 /* Otherwise avoid emitting a constructor of vector elements
7729 by performing the loads using an integer type of the same
7730 size, constructing a vector of those and then
7731 re-interpreting it as the original vector type.
7732 This avoids a huge runtime penalty due to the general
7733 inability to perform store forwarding from smaller stores
7734 to a larger load. */
7735 unsigned lsize
7736 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7737 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7738 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7739 /* If we can't construct such a vector fall back to
7740 element loads of the original vector type. */
4d694b27 7741 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7742 && VECTOR_MODE_P (vmode)
414fef4e 7743 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7744 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7745 != CODE_FOR_nothing))
7746 {
4d694b27 7747 nloads = lnunits;
ff03930a
JJ
7748 lnel = group_size;
7749 ltype = build_nonstandard_integer_type (lsize, 1);
7750 lvectype = build_vector_type (ltype, nloads);
7751 }
ea60dd34 7752 }
e09b4c37 7753 }
2de001ee 7754 else
e09b4c37 7755 {
ea60dd34 7756 nloads = 1;
4d694b27 7757 lnel = const_nunits;
e09b4c37 7758 ltype = vectype;
e09b4c37 7759 }
2de001ee
RS
7760 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7761 }
bb4e4747
BC
7762 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7763 else if (nloads == 1)
7764 ltype = vectype;
7765
2de001ee
RS
7766 if (slp)
7767 {
66c16fd9
RB
7768 /* For SLP permutation support we need to load the whole group,
7769 not only the number of vector stmts the permutation result
7770 fits in. */
b266b968 7771 if (slp_perm)
66c16fd9 7772 {
d9f21f6a
RS
7773 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7774 variable VF. */
7775 unsigned int const_vf = vf.to_constant ();
4d694b27 7776 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7777 dr_chain.create (ncopies);
7778 }
7779 else
7780 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7781 }
4d694b27 7782 unsigned int group_el = 0;
e09b4c37
RB
7783 unsigned HOST_WIDE_INT
7784 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7785 for (j = 0; j < ncopies; j++)
7786 {
7b5fc413 7787 if (nloads > 1)
e09b4c37
RB
7788 vec_alloc (v, nloads);
7789 for (i = 0; i < nloads; i++)
7b5fc413 7790 {
e09b4c37 7791 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7792 group_el * elsz + cst_offset);
19986382
RB
7793 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7794 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7795 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
e09b4c37
RB
7796 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7797 if (nloads > 1)
7798 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7799 gimple_assign_lhs (new_stmt));
7800
7801 group_el += lnel;
7802 if (! slp
7803 || group_el == group_size)
7b5fc413 7804 {
e09b4c37
RB
7805 tree newoff = copy_ssa_name (running_off);
7806 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7807 running_off, stride_step);
7b5fc413
RB
7808 vect_finish_stmt_generation (stmt, incr, gsi);
7809
7810 running_off = newoff;
e09b4c37 7811 group_el = 0;
7b5fc413 7812 }
7b5fc413 7813 }
e09b4c37 7814 if (nloads > 1)
7d75abc8 7815 {
ea60dd34
RB
7816 tree vec_inv = build_constructor (lvectype, v);
7817 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7818 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7819 if (lvectype != vectype)
7820 {
7821 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7822 VIEW_CONVERT_EXPR,
7823 build1 (VIEW_CONVERT_EXPR,
7824 vectype, new_temp));
7825 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7826 }
7d75abc8
MM
7827 }
7828
7b5fc413 7829 if (slp)
b266b968 7830 {
b266b968
RB
7831 if (slp_perm)
7832 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7833 else
7834 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7835 }
7d75abc8 7836 else
225ce44b
RB
7837 {
7838 if (j == 0)
7839 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7840 else
7841 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7842 prev_stmt_info = vinfo_for_stmt (new_stmt);
7843 }
7d75abc8 7844 }
b266b968 7845 if (slp_perm)
29afecdf
RB
7846 {
7847 unsigned n_perms;
7848 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7849 slp_node_instance, false, &n_perms);
7850 }
7d75abc8
MM
7851 return true;
7852 }
aec7ae7d 7853
b5ec4de7
RS
7854 if (memory_access_type == VMAT_GATHER_SCATTER
7855 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7856 grouped_load = false;
7857
0d0293ac 7858 if (grouped_load)
ebfd146a 7859 {
2c53b149
RB
7860 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7861 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7862 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7863 without permutation. */
7864 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7865 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7866 /* For BB vectorization always use the first stmt to base
7867 the data ref pointer on. */
7868 if (bb_vinfo)
7869 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7870
ebfd146a 7871 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7872 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7873 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7874 ??? But we can only do so if there is exactly one
7875 as we have no way to get at the rest. Leave the CSE
7876 opportunity alone.
7877 ??? With the group load eventually participating
7878 in multiple different permutations (having multiple
7879 slp nodes which refer to the same group) the CSE
7880 is even wrong code. See PR56270. */
7881 && !slp)
ebfd146a
IR
7882 {
7883 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7884 return true;
7885 }
7886 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7887 group_gap_adj = 0;
ebfd146a
IR
7888
7889 /* VEC_NUM is the number of vect stmts to be created for this group. */
7890 if (slp)
7891 {
0d0293ac 7892 grouped_load = false;
91ff1504
RB
7893 /* For SLP permutation support we need to load the whole group,
7894 not only the number of vector stmts the permutation result
7895 fits in. */
7896 if (slp_perm)
b267968e 7897 {
d9f21f6a
RS
7898 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7899 variable VF. */
7900 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7901 unsigned int const_nunits = nunits.to_constant ();
7902 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7903 group_gap_adj = vf * group_size - nunits * vec_num;
7904 }
91ff1504 7905 else
b267968e
RB
7906 {
7907 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7908 group_gap_adj
7909 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7910 }
a70d6342 7911 }
ebfd146a 7912 else
9b999e8c 7913 vec_num = group_size;
44fc7854
BE
7914
7915 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7916 }
7917 else
7918 {
7919 first_stmt = stmt;
7920 first_dr = dr;
7921 group_size = vec_num = 1;
9b999e8c 7922 group_gap_adj = 0;
44fc7854 7923 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7924 }
7925
720f5239 7926 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7927 gcc_assert (alignment_support_scheme);
70088b95
RS
7928 vec_loop_masks *loop_masks
7929 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7930 ? &LOOP_VINFO_MASKS (loop_vinfo)
7931 : NULL);
7cfb4d93
RS
7932 /* Targets with store-lane instructions must not require explicit
7933 realignment. vect_supportable_dr_alignment always returns either
7934 dr_aligned or dr_unaligned_supported for masked operations. */
7935 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7936 && !mask
70088b95 7937 && !loop_masks)
272c6793
RS
7938 || alignment_support_scheme == dr_aligned
7939 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7940
7941 /* In case the vectorization factor (VF) is bigger than the number
7942 of elements that we can fit in a vectype (nunits), we have to generate
7943 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7944 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7945 from one copy of the vector stmt to the next, in the field
ff802fa1 7946 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7947 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7948 stmts that use the defs of the current stmt. The example below
7949 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7950 need to create 4 vectorized stmts):
ebfd146a
IR
7951
7952 before vectorization:
7953 RELATED_STMT VEC_STMT
7954 S1: x = memref - -
7955 S2: z = x + 1 - -
7956
7957 step 1: vectorize stmt S1:
7958 We first create the vector stmt VS1_0, and, as usual, record a
7959 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7960 Next, we create the vector stmt VS1_1, and record a pointer to
7961 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7962 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7963 stmts and pointers:
7964 RELATED_STMT VEC_STMT
7965 VS1_0: vx0 = memref0 VS1_1 -
7966 VS1_1: vx1 = memref1 VS1_2 -
7967 VS1_2: vx2 = memref2 VS1_3 -
7968 VS1_3: vx3 = memref3 - -
7969 S1: x = load - VS1_0
7970 S2: z = x + 1 - -
7971
b8698a0f
L
7972 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7973 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7974 stmt S2. */
7975
0d0293ac 7976 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7977
7978 S1: x2 = &base + 2
7979 S2: x0 = &base
7980 S3: x1 = &base + 1
7981 S4: x3 = &base + 3
7982
b8698a0f 7983 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7984 starting from the access of the first stmt of the chain:
7985
7986 VS1: vx0 = &base
7987 VS2: vx1 = &base + vec_size*1
7988 VS3: vx3 = &base + vec_size*2
7989 VS4: vx4 = &base + vec_size*3
7990
7991 Then permutation statements are generated:
7992
e2c83630
RH
7993 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7994 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7995 ...
7996
7997 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7998 (the order of the data-refs in the output of vect_permute_load_chain
7999 corresponds to the order of scalar stmts in the interleaving chain - see
8000 the documentation of vect_permute_load_chain()).
8001 The generation of permutation stmts and recording them in
0d0293ac 8002 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8003
b8698a0f 8004 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8005 permutation stmts above are created for every copy. The result vector
8006 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8007 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8008
8009 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8010 on a target that supports unaligned accesses (dr_unaligned_supported)
8011 we generate the following code:
8012 p = initial_addr;
8013 indx = 0;
8014 loop {
8015 p = p + indx * vectype_size;
8016 vec_dest = *(p);
8017 indx = indx + 1;
8018 }
8019
8020 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8021 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8022 then generate the following code, in which the data in each iteration is
8023 obtained by two vector loads, one from the previous iteration, and one
8024 from the current iteration:
8025 p1 = initial_addr;
8026 msq_init = *(floor(p1))
8027 p2 = initial_addr + VS - 1;
8028 realignment_token = call target_builtin;
8029 indx = 0;
8030 loop {
8031 p2 = p2 + indx * vectype_size
8032 lsq = *(floor(p2))
8033 vec_dest = realign_load (msq, lsq, realignment_token)
8034 indx = indx + 1;
8035 msq = lsq;
8036 } */
8037
8038 /* If the misalignment remains the same throughout the execution of the
8039 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8040 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8041 This can only occur when vectorizing memory accesses in the inner-loop
8042 nested within an outer-loop that is being vectorized. */
8043
d1e4b493 8044 if (nested_in_vect_loop
cf098191
RS
8045 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8046 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8047 {
8048 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8049 compute_in_loop = true;
8050 }
8051
8052 if ((alignment_support_scheme == dr_explicit_realign_optimized
8053 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8054 && !compute_in_loop)
ebfd146a
IR
8055 {
8056 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8057 alignment_support_scheme, NULL_TREE,
8058 &at_loop);
8059 if (alignment_support_scheme == dr_explicit_realign_optimized)
8060 {
538dd0b7 8061 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8062 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8063 size_one_node);
ebfd146a
IR
8064 }
8065 }
8066 else
8067 at_loop = loop;
8068
62da9e14 8069 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8070 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8071
ab2fc782
RS
8072 tree bump;
8073 tree vec_offset = NULL_TREE;
8074 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8075 {
8076 aggr_type = NULL_TREE;
8077 bump = NULL_TREE;
8078 }
8079 else if (memory_access_type == VMAT_GATHER_SCATTER)
8080 {
8081 aggr_type = elem_type;
8082 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8083 &bump, &vec_offset);
8084 }
272c6793 8085 else
ab2fc782
RS
8086 {
8087 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8088 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8089 else
8090 aggr_type = vectype;
8091 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8092 }
272c6793 8093
c3a8f964 8094 tree vec_mask = NULL_TREE;
ebfd146a 8095 prev_stmt_info = NULL;
4d694b27 8096 poly_uint64 group_elt = 0;
ebfd146a 8097 for (j = 0; j < ncopies; j++)
b8698a0f 8098 {
272c6793 8099 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8100 if (j == 0)
74bf76ed
JJ
8101 {
8102 bool simd_lane_access_p
8103 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8104 if (simd_lane_access_p
8105 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8106 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8107 && integer_zerop (DR_OFFSET (first_dr))
8108 && integer_zerop (DR_INIT (first_dr))
8109 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8110 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8111 && (alignment_support_scheme == dr_aligned
8112 || alignment_support_scheme == dr_unaligned_supported))
8113 {
8114 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8115 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8116 inv_p = false;
74bf76ed 8117 }
4f0a0218
RB
8118 else if (first_stmt_for_drptr
8119 && first_stmt != first_stmt_for_drptr)
8120 {
8121 dataref_ptr
8122 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8123 at_loop, offset, &dummy, gsi,
8124 &ptr_incr, simd_lane_access_p,
ab2fc782 8125 &inv_p, byte_offset, bump);
4f0a0218
RB
8126 /* Adjust the pointer by the difference to first_stmt. */
8127 data_reference_p ptrdr
8128 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8129 tree diff = fold_convert (sizetype,
8130 size_binop (MINUS_EXPR,
8131 DR_INIT (first_dr),
8132 DR_INIT (ptrdr)));
8133 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8134 stmt, diff);
8135 }
bfaa08b7
RS
8136 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8137 {
8138 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8139 &dataref_ptr, &vec_offset);
8140 inv_p = false;
8141 }
74bf76ed
JJ
8142 else
8143 dataref_ptr
8144 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8145 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8146 simd_lane_access_p, &inv_p,
ab2fc782 8147 byte_offset, bump);
c3a8f964
RS
8148 if (mask)
8149 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8150 mask_vectype);
74bf76ed 8151 }
ebfd146a 8152 else
c3a8f964
RS
8153 {
8154 if (dataref_offset)
8155 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8156 bump);
bfaa08b7 8157 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8158 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8159 vec_offset);
c3a8f964 8160 else
ab2fc782
RS
8161 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8162 stmt, bump);
c3a8f964 8163 if (mask)
929b4411 8164 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8165 }
ebfd146a 8166
0d0293ac 8167 if (grouped_load || slp_perm)
9771b263 8168 dr_chain.create (vec_num);
5ce1ee7f 8169
2de001ee 8170 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8171 {
272c6793
RS
8172 tree vec_array;
8173
8174 vec_array = create_vector_array (vectype, vec_num);
8175
7cfb4d93 8176 tree final_mask = NULL_TREE;
70088b95
RS
8177 if (loop_masks)
8178 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8179 vectype, j);
7cfb4d93
RS
8180 if (vec_mask)
8181 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8182 vec_mask, gsi);
8183
7e11fc7f 8184 gcall *call;
7cfb4d93 8185 if (final_mask)
7e11fc7f
RS
8186 {
8187 /* Emit:
8188 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8189 VEC_MASK). */
8190 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8191 tree alias_ptr = build_int_cst (ref_type, align);
8192 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8193 dataref_ptr, alias_ptr,
7cfb4d93 8194 final_mask);
7e11fc7f
RS
8195 }
8196 else
8197 {
8198 /* Emit:
8199 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8200 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8201 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8202 }
a844293d
RS
8203 gimple_call_set_lhs (call, vec_array);
8204 gimple_call_set_nothrow (call, true);
8205 new_stmt = call;
272c6793 8206 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8207
272c6793
RS
8208 /* Extract each vector into an SSA_NAME. */
8209 for (i = 0; i < vec_num; i++)
ebfd146a 8210 {
272c6793
RS
8211 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8212 vec_array, i);
9771b263 8213 dr_chain.quick_push (new_temp);
272c6793
RS
8214 }
8215
8216 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8217 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8218
8219 /* Record that VEC_ARRAY is now dead. */
8220 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8221 }
8222 else
8223 {
8224 for (i = 0; i < vec_num; i++)
8225 {
7cfb4d93 8226 tree final_mask = NULL_TREE;
70088b95 8227 if (loop_masks
7cfb4d93 8228 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8229 final_mask = vect_get_loop_mask (gsi, loop_masks,
8230 vec_num * ncopies,
7cfb4d93
RS
8231 vectype, vec_num * j + i);
8232 if (vec_mask)
8233 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8234 vec_mask, gsi);
8235
272c6793
RS
8236 if (i > 0)
8237 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8238 stmt, bump);
272c6793
RS
8239
8240 /* 2. Create the vector-load in the loop. */
8241 switch (alignment_support_scheme)
8242 {
8243 case dr_aligned:
8244 case dr_unaligned_supported:
be1ac4ec 8245 {
644ffefd
MJ
8246 unsigned int align, misalign;
8247
bfaa08b7
RS
8248 if (memory_access_type == VMAT_GATHER_SCATTER)
8249 {
8250 tree scale = size_int (gs_info.scale);
8251 gcall *call;
70088b95 8252 if (loop_masks)
bfaa08b7
RS
8253 call = gimple_build_call_internal
8254 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8255 vec_offset, scale, final_mask);
8256 else
8257 call = gimple_build_call_internal
8258 (IFN_GATHER_LOAD, 3, dataref_ptr,
8259 vec_offset, scale);
8260 gimple_call_set_nothrow (call, true);
8261 new_stmt = call;
8262 data_ref = NULL_TREE;
8263 break;
8264 }
8265
f702e7d4 8266 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8267 if (alignment_support_scheme == dr_aligned)
8268 {
8269 gcc_assert (aligned_access_p (first_dr));
644ffefd 8270 misalign = 0;
272c6793
RS
8271 }
8272 else if (DR_MISALIGNMENT (first_dr) == -1)
8273 {
25f68d90 8274 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8275 misalign = 0;
272c6793
RS
8276 }
8277 else
c3a8f964 8278 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8279 if (dataref_offset == NULL_TREE
8280 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8281 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8282 align, misalign);
c3a8f964 8283
7cfb4d93 8284 if (final_mask)
c3a8f964
RS
8285 {
8286 align = least_bit_hwi (misalign | align);
8287 tree ptr = build_int_cst (ref_type, align);
8288 gcall *call
8289 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8290 dataref_ptr, ptr,
7cfb4d93 8291 final_mask);
c3a8f964
RS
8292 gimple_call_set_nothrow (call, true);
8293 new_stmt = call;
8294 data_ref = NULL_TREE;
8295 }
8296 else
8297 {
8298 data_ref
8299 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8300 dataref_offset
8301 ? dataref_offset
8302 : build_int_cst (ref_type, 0));
8303 if (alignment_support_scheme == dr_aligned)
8304 ;
8305 else if (DR_MISALIGNMENT (first_dr) == -1)
8306 TREE_TYPE (data_ref)
8307 = build_aligned_type (TREE_TYPE (data_ref),
8308 align * BITS_PER_UNIT);
8309 else
8310 TREE_TYPE (data_ref)
8311 = build_aligned_type (TREE_TYPE (data_ref),
8312 TYPE_ALIGN (elem_type));
8313 }
272c6793 8314 break;
be1ac4ec 8315 }
272c6793 8316 case dr_explicit_realign:
267d3070 8317 {
272c6793 8318 tree ptr, bump;
272c6793 8319
d88981fc 8320 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8321
8322 if (compute_in_loop)
8323 msq = vect_setup_realignment (first_stmt, gsi,
8324 &realignment_token,
8325 dr_explicit_realign,
8326 dataref_ptr, NULL);
8327
aed93b23
RB
8328 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8329 ptr = copy_ssa_name (dataref_ptr);
8330 else
8331 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8332 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8333 new_stmt = gimple_build_assign
8334 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8335 build_int_cst
8336 (TREE_TYPE (dataref_ptr),
f702e7d4 8337 -(HOST_WIDE_INT) align));
272c6793
RS
8338 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8339 data_ref
8340 = build2 (MEM_REF, vectype, ptr,
44fc7854 8341 build_int_cst (ref_type, 0));
19986382 8342 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8343 vec_dest = vect_create_destination_var (scalar_dest,
8344 vectype);
8345 new_stmt = gimple_build_assign (vec_dest, data_ref);
8346 new_temp = make_ssa_name (vec_dest, new_stmt);
8347 gimple_assign_set_lhs (new_stmt, new_temp);
8348 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8349 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8350 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8351 msq = new_temp;
8352
d88981fc 8353 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8354 TYPE_SIZE_UNIT (elem_type));
d88981fc 8355 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8356 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8357 new_stmt = gimple_build_assign
8358 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8359 build_int_cst
f702e7d4 8360 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8361 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8362 gimple_assign_set_lhs (new_stmt, ptr);
8363 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8364 data_ref
8365 = build2 (MEM_REF, vectype, ptr,
44fc7854 8366 build_int_cst (ref_type, 0));
272c6793 8367 break;
267d3070 8368 }
272c6793 8369 case dr_explicit_realign_optimized:
f702e7d4
RS
8370 {
8371 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8372 new_temp = copy_ssa_name (dataref_ptr);
8373 else
8374 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8375 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8376 new_stmt = gimple_build_assign
8377 (new_temp, BIT_AND_EXPR, dataref_ptr,
8378 build_int_cst (TREE_TYPE (dataref_ptr),
8379 -(HOST_WIDE_INT) align));
8380 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8381 data_ref
8382 = build2 (MEM_REF, vectype, new_temp,
8383 build_int_cst (ref_type, 0));
8384 break;
8385 }
272c6793
RS
8386 default:
8387 gcc_unreachable ();
8388 }
ebfd146a 8389 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8390 /* DATA_REF is null if we've already built the statement. */
8391 if (data_ref)
19986382
RB
8392 {
8393 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8394 new_stmt = gimple_build_assign (vec_dest, data_ref);
8395 }
ebfd146a 8396 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8397 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
8398 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8399
272c6793
RS
8400 /* 3. Handle explicit realignment if necessary/supported.
8401 Create in loop:
8402 vec_dest = realign_load (msq, lsq, realignment_token) */
8403 if (alignment_support_scheme == dr_explicit_realign_optimized
8404 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8405 {
272c6793
RS
8406 lsq = gimple_assign_lhs (new_stmt);
8407 if (!realignment_token)
8408 realignment_token = dataref_ptr;
8409 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8410 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8411 msq, lsq, realignment_token);
272c6793
RS
8412 new_temp = make_ssa_name (vec_dest, new_stmt);
8413 gimple_assign_set_lhs (new_stmt, new_temp);
8414 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8415
8416 if (alignment_support_scheme == dr_explicit_realign_optimized)
8417 {
8418 gcc_assert (phi);
8419 if (i == vec_num - 1 && j == ncopies - 1)
8420 add_phi_arg (phi, lsq,
8421 loop_latch_edge (containing_loop),
9e227d60 8422 UNKNOWN_LOCATION);
272c6793
RS
8423 msq = lsq;
8424 }
ebfd146a 8425 }
ebfd146a 8426
59fd17e3
RB
8427 /* 4. Handle invariant-load. */
8428 if (inv_p && !bb_vinfo)
8429 {
59fd17e3 8430 gcc_assert (!grouped_load);
d1417442
JJ
8431 /* If we have versioned for aliasing or the loop doesn't
8432 have any data dependencies that would preclude this,
8433 then we are sure this is a loop invariant load and
8434 thus we can insert it on the preheader edge. */
8435 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8436 && !nested_in_vect_loop
6b916b36 8437 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8438 {
8439 if (dump_enabled_p ())
8440 {
8441 dump_printf_loc (MSG_NOTE, vect_location,
8442 "hoisting out of the vectorized "
8443 "loop: ");
8444 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8445 }
b731b390 8446 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8447 gsi_insert_on_edge_immediate
8448 (loop_preheader_edge (loop),
8449 gimple_build_assign (tem,
8450 unshare_expr
8451 (gimple_assign_rhs1 (stmt))));
8452 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
8453 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8454 set_vinfo_for_stmt (new_stmt,
8455 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
8456 }
8457 else
8458 {
8459 gimple_stmt_iterator gsi2 = *gsi;
8460 gsi_next (&gsi2);
8461 new_temp = vect_init_vector (stmt, scalar_dest,
8462 vectype, &gsi2);
34cd48e5 8463 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 8464 }
59fd17e3
RB
8465 }
8466
62da9e14 8467 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8468 {
aec7ae7d
JJ
8469 tree perm_mask = perm_mask_for_reverse (vectype);
8470 new_temp = permute_vec_elements (new_temp, new_temp,
8471 perm_mask, stmt, gsi);
ebfd146a
IR
8472 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8473 }
267d3070 8474
272c6793 8475 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8476 vect_transform_grouped_load (). */
8477 if (grouped_load || slp_perm)
9771b263 8478 dr_chain.quick_push (new_temp);
267d3070 8479
272c6793
RS
8480 /* Store vector loads in the corresponding SLP_NODE. */
8481 if (slp && !slp_perm)
9771b263 8482 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
8483
8484 /* With SLP permutation we load the gaps as well, without
8485 we need to skip the gaps after we manage to fully load
2c53b149 8486 all elements. group_gap_adj is DR_GROUP_SIZE here. */
b267968e 8487 group_elt += nunits;
d9f21f6a
RS
8488 if (maybe_ne (group_gap_adj, 0U)
8489 && !slp_perm
8490 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8491 {
d9f21f6a
RS
8492 poly_wide_int bump_val
8493 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8494 * group_gap_adj);
8e6cdc90 8495 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8496 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8497 stmt, bump);
8498 group_elt = 0;
8499 }
272c6793 8500 }
9b999e8c
RB
8501 /* Bump the vector pointer to account for a gap or for excess
8502 elements loaded for a permuted SLP load. */
d9f21f6a 8503 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8504 {
d9f21f6a
RS
8505 poly_wide_int bump_val
8506 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8507 * group_gap_adj);
8e6cdc90 8508 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8509 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8510 stmt, bump);
8511 }
ebfd146a
IR
8512 }
8513
8514 if (slp && !slp_perm)
8515 continue;
8516
8517 if (slp_perm)
8518 {
29afecdf 8519 unsigned n_perms;
01d8bf07 8520 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8521 slp_node_instance, false,
8522 &n_perms))
ebfd146a 8523 {
9771b263 8524 dr_chain.release ();
ebfd146a
IR
8525 return false;
8526 }
8527 }
8528 else
8529 {
0d0293ac 8530 if (grouped_load)
ebfd146a 8531 {
2de001ee 8532 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8533 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8534 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8535 }
8536 else
8537 {
8538 if (j == 0)
8539 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8540 else
8541 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8542 prev_stmt_info = vinfo_for_stmt (new_stmt);
8543 }
8544 }
9771b263 8545 dr_chain.release ();
ebfd146a
IR
8546 }
8547
ebfd146a
IR
8548 return true;
8549}
8550
8551/* Function vect_is_simple_cond.
b8698a0f 8552
ebfd146a
IR
8553 Input:
8554 LOOP - the loop that is being vectorized.
8555 COND - Condition that is checked for simple use.
8556
e9e1d143
RG
8557 Output:
8558 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8559 *DTS - The def types for the arguments of the comparison
e9e1d143 8560
ebfd146a
IR
8561 Returns whether a COND can be vectorized. Checks whether
8562 condition operands are supportable using vec_is_simple_use. */
8563
87aab9b2 8564static bool
4fc5ebf1 8565vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8566 tree *comp_vectype, enum vect_def_type *dts,
8567 tree vectype)
ebfd146a
IR
8568{
8569 tree lhs, rhs;
e9e1d143 8570 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8571
a414c77f
IE
8572 /* Mask case. */
8573 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8574 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f 8575 {
894dd753 8576 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
a414c77f
IE
8577 || !*comp_vectype
8578 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8579 return false;
8580 return true;
8581 }
8582
ebfd146a
IR
8583 if (!COMPARISON_CLASS_P (cond))
8584 return false;
8585
8586 lhs = TREE_OPERAND (cond, 0);
8587 rhs = TREE_OPERAND (cond, 1);
8588
8589 if (TREE_CODE (lhs) == SSA_NAME)
8590 {
894dd753 8591 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
ebfd146a
IR
8592 return false;
8593 }
4fc5ebf1
JG
8594 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8595 || TREE_CODE (lhs) == FIXED_CST)
8596 dts[0] = vect_constant_def;
8597 else
ebfd146a
IR
8598 return false;
8599
8600 if (TREE_CODE (rhs) == SSA_NAME)
8601 {
894dd753 8602 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
ebfd146a
IR
8603 return false;
8604 }
4fc5ebf1
JG
8605 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8606 || TREE_CODE (rhs) == FIXED_CST)
8607 dts[1] = vect_constant_def;
8608 else
ebfd146a
IR
8609 return false;
8610
28b33016 8611 if (vectype1 && vectype2
928686b1
RS
8612 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8613 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8614 return false;
8615
e9e1d143 8616 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8617 /* Invariant comparison. */
4515e413 8618 if (! *comp_vectype && vectype)
8da4c8d8
RB
8619 {
8620 tree scalar_type = TREE_TYPE (lhs);
8621 /* If we can widen the comparison to match vectype do so. */
8622 if (INTEGRAL_TYPE_P (scalar_type)
8623 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8624 TYPE_SIZE (TREE_TYPE (vectype))))
8625 scalar_type = build_nonstandard_integer_type
8626 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8627 TYPE_UNSIGNED (scalar_type));
8628 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8629 }
8630
ebfd146a
IR
8631 return true;
8632}
8633
8634/* vectorizable_condition.
8635
b8698a0f
L
8636 Check if STMT is conditional modify expression that can be vectorized.
8637 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8638 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8639 at GSI.
8640
8641 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8642 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8643 else clause if it is 2).
ebfd146a
IR
8644
8645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8646
4bbe8262 8647bool
355fe088
TS
8648vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8649 gimple **vec_stmt, tree reduc_def, int reduc_index,
68435eb2 8650 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a
IR
8651{
8652 tree scalar_dest = NULL_TREE;
8653 tree vec_dest = NULL_TREE;
01216d27
JJ
8654 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8655 tree then_clause, else_clause;
ebfd146a 8656 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8657 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8658 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8659 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8660 tree vec_compare;
ebfd146a
IR
8661 tree new_temp;
8662 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8663 enum vect_def_type dts[4]
8664 = {vect_unknown_def_type, vect_unknown_def_type,
8665 vect_unknown_def_type, vect_unknown_def_type};
8666 int ndts = 4;
f7e531cf 8667 int ncopies;
01216d27 8668 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8669 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8670 int i, j;
8671 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8672 vec<tree> vec_oprnds0 = vNULL;
8673 vec<tree> vec_oprnds1 = vNULL;
8674 vec<tree> vec_oprnds2 = vNULL;
8675 vec<tree> vec_oprnds3 = vNULL;
74946978 8676 tree vec_cmp_type;
a414c77f 8677 bool masked = false;
b8698a0f 8678
f7e531cf
IR
8679 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8680 return false;
8681
bb6c2b68
RS
8682 vect_reduction_type reduction_type
8683 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8684 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8685 {
8686 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8687 return false;
ebfd146a 8688
af29617a
AH
8689 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8690 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8691 && reduc_def))
8692 return false;
ebfd146a 8693
af29617a
AH
8694 /* FORNOW: not yet supported. */
8695 if (STMT_VINFO_LIVE_P (stmt_info))
8696 {
8697 if (dump_enabled_p ())
8698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8699 "value used after loop.\n");
8700 return false;
8701 }
ebfd146a
IR
8702 }
8703
8704 /* Is vectorizable conditional operation? */
8705 if (!is_gimple_assign (stmt))
8706 return false;
8707
8708 code = gimple_assign_rhs_code (stmt);
8709
8710 if (code != COND_EXPR)
8711 return false;
8712
465c8c19 8713 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8714 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8715
fce57248 8716 if (slp_node)
465c8c19
JJ
8717 ncopies = 1;
8718 else
e8f142e2 8719 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8720
8721 gcc_assert (ncopies >= 1);
8722 if (reduc_index && ncopies > 1)
8723 return false; /* FORNOW */
8724
4e71066d
RG
8725 cond_expr = gimple_assign_rhs1 (stmt);
8726 then_clause = gimple_assign_rhs2 (stmt);
8727 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8728
4fc5ebf1 8729 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8730 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8731 || !comp_vectype)
ebfd146a
IR
8732 return false;
8733
894dd753 8734 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
2947d3b2 8735 return false;
894dd753 8736 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
ebfd146a 8737 return false;
2947d3b2
IE
8738
8739 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8740 return false;
8741
8742 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8743 return false;
8744
28b33016
IE
8745 masked = !COMPARISON_CLASS_P (cond_expr);
8746 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8747
74946978
MP
8748 if (vec_cmp_type == NULL_TREE)
8749 return false;
784fb9b3 8750
01216d27
JJ
8751 cond_code = TREE_CODE (cond_expr);
8752 if (!masked)
8753 {
8754 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8755 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8756 }
8757
8758 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8759 {
8760 /* Boolean values may have another representation in vectors
8761 and therefore we prefer bit operations over comparison for
8762 them (which also works for scalar masks). We store opcodes
8763 to use in bitop1 and bitop2. Statement is vectorized as
8764 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8765 depending on bitop1 and bitop2 arity. */
8766 switch (cond_code)
8767 {
8768 case GT_EXPR:
8769 bitop1 = BIT_NOT_EXPR;
8770 bitop2 = BIT_AND_EXPR;
8771 break;
8772 case GE_EXPR:
8773 bitop1 = BIT_NOT_EXPR;
8774 bitop2 = BIT_IOR_EXPR;
8775 break;
8776 case LT_EXPR:
8777 bitop1 = BIT_NOT_EXPR;
8778 bitop2 = BIT_AND_EXPR;
8779 std::swap (cond_expr0, cond_expr1);
8780 break;
8781 case LE_EXPR:
8782 bitop1 = BIT_NOT_EXPR;
8783 bitop2 = BIT_IOR_EXPR;
8784 std::swap (cond_expr0, cond_expr1);
8785 break;
8786 case NE_EXPR:
8787 bitop1 = BIT_XOR_EXPR;
8788 break;
8789 case EQ_EXPR:
8790 bitop1 = BIT_XOR_EXPR;
8791 bitop2 = BIT_NOT_EXPR;
8792 break;
8793 default:
8794 return false;
8795 }
8796 cond_code = SSA_NAME;
8797 }
8798
b8698a0f 8799 if (!vec_stmt)
ebfd146a 8800 {
01216d27
JJ
8801 if (bitop1 != NOP_EXPR)
8802 {
8803 machine_mode mode = TYPE_MODE (comp_vectype);
8804 optab optab;
8805
8806 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8807 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8808 return false;
8809
8810 if (bitop2 != NOP_EXPR)
8811 {
8812 optab = optab_for_tree_code (bitop2, comp_vectype,
8813 optab_default);
8814 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8815 return false;
8816 }
8817 }
4fc5ebf1
JG
8818 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8819 cond_code))
8820 {
68435eb2
RB
8821 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8822 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8823 cost_vec);
4fc5ebf1
JG
8824 return true;
8825 }
8826 return false;
ebfd146a
IR
8827 }
8828
f7e531cf
IR
8829 /* Transform. */
8830
8831 if (!slp_node)
8832 {
9771b263
DN
8833 vec_oprnds0.create (1);
8834 vec_oprnds1.create (1);
8835 vec_oprnds2.create (1);
8836 vec_oprnds3.create (1);
f7e531cf 8837 }
ebfd146a
IR
8838
8839 /* Handle def. */
8840 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8841 if (reduction_type != EXTRACT_LAST_REDUCTION)
8842 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8843
8844 /* Handle cond expr. */
a855b1b1
MM
8845 for (j = 0; j < ncopies; j++)
8846 {
bb6c2b68 8847 gimple *new_stmt = NULL;
a855b1b1
MM
8848 if (j == 0)
8849 {
f7e531cf
IR
8850 if (slp_node)
8851 {
00f96dc9
TS
8852 auto_vec<tree, 4> ops;
8853 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8854
a414c77f 8855 if (masked)
01216d27 8856 ops.safe_push (cond_expr);
a414c77f
IE
8857 else
8858 {
01216d27
JJ
8859 ops.safe_push (cond_expr0);
8860 ops.safe_push (cond_expr1);
a414c77f 8861 }
9771b263
DN
8862 ops.safe_push (then_clause);
8863 ops.safe_push (else_clause);
306b0c92 8864 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8865 vec_oprnds3 = vec_defs.pop ();
8866 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8867 if (!masked)
8868 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8869 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8870 }
8871 else
8872 {
a414c77f
IE
8873 if (masked)
8874 {
8875 vec_cond_lhs
8876 = vect_get_vec_def_for_operand (cond_expr, stmt,
8877 comp_vectype);
894dd753 8878 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
a414c77f
IE
8879 }
8880 else
8881 {
01216d27
JJ
8882 vec_cond_lhs
8883 = vect_get_vec_def_for_operand (cond_expr0,
8884 stmt, comp_vectype);
894dd753 8885 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
01216d27
JJ
8886
8887 vec_cond_rhs
8888 = vect_get_vec_def_for_operand (cond_expr1,
8889 stmt, comp_vectype);
894dd753 8890 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
a414c77f 8891 }
f7e531cf
IR
8892 if (reduc_index == 1)
8893 vec_then_clause = reduc_def;
8894 else
8895 {
8896 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241 8897 stmt);
894dd753 8898 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
f7e531cf
IR
8899 }
8900 if (reduc_index == 2)
8901 vec_else_clause = reduc_def;
8902 else
8903 {
8904 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241 8905 stmt);
894dd753 8906 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
f7e531cf 8907 }
a855b1b1
MM
8908 }
8909 }
8910 else
8911 {
a414c77f
IE
8912 vec_cond_lhs
8913 = vect_get_vec_def_for_stmt_copy (dts[0],
8914 vec_oprnds0.pop ());
8915 if (!masked)
8916 vec_cond_rhs
8917 = vect_get_vec_def_for_stmt_copy (dts[1],
8918 vec_oprnds1.pop ());
8919
a855b1b1 8920 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8921 vec_oprnds2.pop ());
a855b1b1 8922 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8923 vec_oprnds3.pop ());
f7e531cf
IR
8924 }
8925
8926 if (!slp_node)
8927 {
9771b263 8928 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8929 if (!masked)
8930 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8931 vec_oprnds2.quick_push (vec_then_clause);
8932 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8933 }
8934
9dc3f7de 8935 /* Arguments are ready. Create the new vector stmt. */
9771b263 8936 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8937 {
9771b263
DN
8938 vec_then_clause = vec_oprnds2[i];
8939 vec_else_clause = vec_oprnds3[i];
a855b1b1 8940
a414c77f
IE
8941 if (masked)
8942 vec_compare = vec_cond_lhs;
8943 else
8944 {
8945 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8946 if (bitop1 == NOP_EXPR)
8947 vec_compare = build2 (cond_code, vec_cmp_type,
8948 vec_cond_lhs, vec_cond_rhs);
8949 else
8950 {
8951 new_temp = make_ssa_name (vec_cmp_type);
8952 if (bitop1 == BIT_NOT_EXPR)
8953 new_stmt = gimple_build_assign (new_temp, bitop1,
8954 vec_cond_rhs);
8955 else
8956 new_stmt
8957 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8958 vec_cond_rhs);
8959 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8960 if (bitop2 == NOP_EXPR)
8961 vec_compare = new_temp;
8962 else if (bitop2 == BIT_NOT_EXPR)
8963 {
8964 /* Instead of doing ~x ? y : z do x ? z : y. */
8965 vec_compare = new_temp;
8966 std::swap (vec_then_clause, vec_else_clause);
8967 }
8968 else
8969 {
8970 vec_compare = make_ssa_name (vec_cmp_type);
8971 new_stmt
8972 = gimple_build_assign (vec_compare, bitop2,
8973 vec_cond_lhs, new_temp);
8974 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8975 }
8976 }
a414c77f 8977 }
bb6c2b68
RS
8978 if (reduction_type == EXTRACT_LAST_REDUCTION)
8979 {
8980 if (!is_gimple_val (vec_compare))
8981 {
8982 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8983 new_stmt = gimple_build_assign (vec_compare_name,
8984 vec_compare);
8985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8986 vec_compare = vec_compare_name;
8987 }
8988 gcc_assert (reduc_index == 2);
8989 new_stmt = gimple_build_call_internal
8990 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8991 vec_then_clause);
8992 gimple_call_set_lhs (new_stmt, scalar_dest);
8993 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8994 if (stmt == gsi_stmt (*gsi))
8995 vect_finish_replace_stmt (stmt, new_stmt);
8996 else
8997 {
8998 /* In this case we're moving the definition to later in the
8999 block. That doesn't matter because the only uses of the
9000 lhs are in phi statements. */
9001 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9002 gsi_remove (&old_gsi, true);
9003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9004 }
9005 }
9006 else
9007 {
9008 new_temp = make_ssa_name (vec_dest);
9009 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9010 vec_compare, vec_then_clause,
9011 vec_else_clause);
9012 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9013 }
f7e531cf 9014 if (slp_node)
9771b263 9015 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
9016 }
9017
9018 if (slp_node)
9019 continue;
9020
9021 if (j == 0)
9022 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9023 else
9024 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9025
9026 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 9027 }
b8698a0f 9028
9771b263
DN
9029 vec_oprnds0.release ();
9030 vec_oprnds1.release ();
9031 vec_oprnds2.release ();
9032 vec_oprnds3.release ();
f7e531cf 9033
ebfd146a
IR
9034 return true;
9035}
9036
42fd8198
IE
9037/* vectorizable_comparison.
9038
9039 Check if STMT is comparison expression that can be vectorized.
9040 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9041 comparison, put it in VEC_STMT, and insert it at GSI.
9042
9043 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9044
fce57248 9045static bool
42fd8198
IE
9046vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9047 gimple **vec_stmt, tree reduc_def,
68435eb2 9048 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198
IE
9049{
9050 tree lhs, rhs1, rhs2;
9051 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9052 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9053 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9054 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9055 tree new_temp;
9056 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9057 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9058 int ndts = 2;
928686b1 9059 poly_uint64 nunits;
42fd8198 9060 int ncopies;
49e76ff1 9061 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9062 stmt_vec_info prev_stmt_info = NULL;
9063 int i, j;
9064 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9065 vec<tree> vec_oprnds0 = vNULL;
9066 vec<tree> vec_oprnds1 = vNULL;
42fd8198
IE
9067 tree mask_type;
9068 tree mask;
9069
c245362b
IE
9070 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9071 return false;
9072
30480bcd 9073 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9074 return false;
9075
9076 mask_type = vectype;
9077 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9078
fce57248 9079 if (slp_node)
42fd8198
IE
9080 ncopies = 1;
9081 else
e8f142e2 9082 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9083
9084 gcc_assert (ncopies >= 1);
42fd8198
IE
9085 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9086 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9087 && reduc_def))
9088 return false;
9089
9090 if (STMT_VINFO_LIVE_P (stmt_info))
9091 {
9092 if (dump_enabled_p ())
9093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9094 "value used after loop.\n");
9095 return false;
9096 }
9097
9098 if (!is_gimple_assign (stmt))
9099 return false;
9100
9101 code = gimple_assign_rhs_code (stmt);
9102
9103 if (TREE_CODE_CLASS (code) != tcc_comparison)
9104 return false;
9105
9106 rhs1 = gimple_assign_rhs1 (stmt);
9107 rhs2 = gimple_assign_rhs2 (stmt);
9108
894dd753 9109 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
42fd8198
IE
9110 return false;
9111
894dd753 9112 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
42fd8198
IE
9113 return false;
9114
9115 if (vectype1 && vectype2
928686b1
RS
9116 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9117 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9118 return false;
9119
9120 vectype = vectype1 ? vectype1 : vectype2;
9121
9122 /* Invariant comparison. */
9123 if (!vectype)
9124 {
69a9a66f 9125 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9126 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9127 return false;
9128 }
928686b1 9129 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9130 return false;
9131
49e76ff1
IE
9132 /* Can't compare mask and non-mask types. */
9133 if (vectype1 && vectype2
9134 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9135 return false;
9136
9137 /* Boolean values may have another representation in vectors
9138 and therefore we prefer bit operations over comparison for
9139 them (which also works for scalar masks). We store opcodes
9140 to use in bitop1 and bitop2. Statement is vectorized as
9141 BITOP2 (rhs1 BITOP1 rhs2) or
9142 rhs1 BITOP2 (BITOP1 rhs2)
9143 depending on bitop1 and bitop2 arity. */
9144 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9145 {
9146 if (code == GT_EXPR)
9147 {
9148 bitop1 = BIT_NOT_EXPR;
9149 bitop2 = BIT_AND_EXPR;
9150 }
9151 else if (code == GE_EXPR)
9152 {
9153 bitop1 = BIT_NOT_EXPR;
9154 bitop2 = BIT_IOR_EXPR;
9155 }
9156 else if (code == LT_EXPR)
9157 {
9158 bitop1 = BIT_NOT_EXPR;
9159 bitop2 = BIT_AND_EXPR;
9160 std::swap (rhs1, rhs2);
264d951a 9161 std::swap (dts[0], dts[1]);
49e76ff1
IE
9162 }
9163 else if (code == LE_EXPR)
9164 {
9165 bitop1 = BIT_NOT_EXPR;
9166 bitop2 = BIT_IOR_EXPR;
9167 std::swap (rhs1, rhs2);
264d951a 9168 std::swap (dts[0], dts[1]);
49e76ff1
IE
9169 }
9170 else
9171 {
9172 bitop1 = BIT_XOR_EXPR;
9173 if (code == EQ_EXPR)
9174 bitop2 = BIT_NOT_EXPR;
9175 }
9176 }
9177
42fd8198
IE
9178 if (!vec_stmt)
9179 {
49e76ff1 9180 if (bitop1 == NOP_EXPR)
68435eb2
RB
9181 {
9182 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9183 return false;
9184 }
49e76ff1
IE
9185 else
9186 {
9187 machine_mode mode = TYPE_MODE (vectype);
9188 optab optab;
9189
9190 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9191 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9192 return false;
9193
9194 if (bitop2 != NOP_EXPR)
9195 {
9196 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9197 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9198 return false;
9199 }
49e76ff1 9200 }
68435eb2
RB
9201
9202 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9203 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9204 dts, ndts, slp_node, cost_vec);
9205 return true;
42fd8198
IE
9206 }
9207
9208 /* Transform. */
9209 if (!slp_node)
9210 {
9211 vec_oprnds0.create (1);
9212 vec_oprnds1.create (1);
9213 }
9214
9215 /* Handle def. */
9216 lhs = gimple_assign_lhs (stmt);
9217 mask = vect_create_destination_var (lhs, mask_type);
9218
9219 /* Handle cmp expr. */
9220 for (j = 0; j < ncopies; j++)
9221 {
9222 gassign *new_stmt = NULL;
9223 if (j == 0)
9224 {
9225 if (slp_node)
9226 {
9227 auto_vec<tree, 2> ops;
9228 auto_vec<vec<tree>, 2> vec_defs;
9229
9230 ops.safe_push (rhs1);
9231 ops.safe_push (rhs2);
306b0c92 9232 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9233 vec_oprnds1 = vec_defs.pop ();
9234 vec_oprnds0 = vec_defs.pop ();
9235 }
9236 else
9237 {
e4af0bc4
IE
9238 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9239 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9240 }
9241 }
9242 else
9243 {
9244 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9245 vec_oprnds0.pop ());
9246 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9247 vec_oprnds1.pop ());
9248 }
9249
9250 if (!slp_node)
9251 {
9252 vec_oprnds0.quick_push (vec_rhs1);
9253 vec_oprnds1.quick_push (vec_rhs2);
9254 }
9255
9256 /* Arguments are ready. Create the new vector stmt. */
9257 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9258 {
9259 vec_rhs2 = vec_oprnds1[i];
9260
9261 new_temp = make_ssa_name (mask);
49e76ff1
IE
9262 if (bitop1 == NOP_EXPR)
9263 {
9264 new_stmt = gimple_build_assign (new_temp, code,
9265 vec_rhs1, vec_rhs2);
9266 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9267 }
9268 else
9269 {
9270 if (bitop1 == BIT_NOT_EXPR)
9271 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9272 else
9273 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9274 vec_rhs2);
9275 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9276 if (bitop2 != NOP_EXPR)
9277 {
9278 tree res = make_ssa_name (mask);
9279 if (bitop2 == BIT_NOT_EXPR)
9280 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9281 else
9282 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9283 new_temp);
9284 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9285 }
9286 }
42fd8198
IE
9287 if (slp_node)
9288 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9289 }
9290
9291 if (slp_node)
9292 continue;
9293
9294 if (j == 0)
9295 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9296 else
9297 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9298
9299 prev_stmt_info = vinfo_for_stmt (new_stmt);
9300 }
9301
9302 vec_oprnds0.release ();
9303 vec_oprnds1.release ();
9304
9305 return true;
9306}
ebfd146a 9307
68a0f2ff
RS
9308/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9309 can handle all live statements in the node. Otherwise return true
9310 if STMT is not live or if vectorizable_live_operation can handle it.
9311 GSI and VEC_STMT are as for vectorizable_live_operation. */
9312
9313static bool
9314can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
9315 slp_tree slp_node, gimple **vec_stmt,
9316 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9317{
9318 if (slp_node)
9319 {
9320 gimple *slp_stmt;
9321 unsigned int i;
9322 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9323 {
9324 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9325 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9326 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
68435eb2 9327 vec_stmt, cost_vec))
68a0f2ff
RS
9328 return false;
9329 }
9330 }
9331 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
68435eb2
RB
9332 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9333 cost_vec))
68a0f2ff
RS
9334 return false;
9335
9336 return true;
9337}
9338
8644a673 9339/* Make sure the statement is vectorizable. */
ebfd146a
IR
9340
9341bool
891ad31c 9342vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
68435eb2 9343 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
ebfd146a 9344{
8644a673 9345 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 9346 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9347 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9348 bool ok;
355fe088 9349 gimple *pattern_stmt;
363477c0 9350 gimple_seq pattern_def_seq;
ebfd146a 9351
73fbfcad 9352 if (dump_enabled_p ())
ebfd146a 9353 {
78c60e3d
SS
9354 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9355 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9356 }
ebfd146a 9357
1825a1f3 9358 if (gimple_has_volatile_ops (stmt))
b8698a0f 9359 {
73fbfcad 9360 if (dump_enabled_p ())
78c60e3d 9361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9362 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9363
9364 return false;
9365 }
b8698a0f 9366
d54a098e
RS
9367 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9368 && node == NULL
9369 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9370 {
9371 gimple_stmt_iterator si;
9372
9373 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9374 {
9375 gimple *pattern_def_stmt = gsi_stmt (si);
9376 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9377 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9378 {
9379 /* Analyze def stmt of STMT if it's a pattern stmt. */
9380 if (dump_enabled_p ())
9381 {
9382 dump_printf_loc (MSG_NOTE, vect_location,
9383 "==> examining pattern def statement: ");
9384 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9385 }
9386
9387 if (!vect_analyze_stmt (pattern_def_stmt,
9388 need_to_vectorize, node, node_instance,
9389 cost_vec))
9390 return false;
9391 }
9392 }
9393 }
9394
b8698a0f 9395 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9396 to include:
9397 - the COND_EXPR which is the loop exit condition
9398 - any LABEL_EXPRs in the loop
b8698a0f 9399 - computations that are used only for array indexing or loop control.
8644a673 9400 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9401 instance, therefore, all the statements are relevant.
ebfd146a 9402
d092494c 9403 Pattern statement needs to be analyzed instead of the original statement
83197f37 9404 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9405 statements. In basic blocks we are called from some SLP instance
9406 traversal, don't analyze pattern stmts instead, the pattern stmts
9407 already will be part of SLP instance. */
83197f37
IR
9408
9409 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9410 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9411 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9412 {
9d5e7640 9413 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 9414 && pattern_stmt
9d5e7640
IR
9415 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9416 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9417 {
83197f37 9418 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
9419 stmt = pattern_stmt;
9420 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 9421 if (dump_enabled_p ())
9d5e7640 9422 {
78c60e3d
SS
9423 dump_printf_loc (MSG_NOTE, vect_location,
9424 "==> examining pattern statement: ");
9425 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9426 }
9427 }
9428 else
9429 {
73fbfcad 9430 if (dump_enabled_p ())
e645e942 9431 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9432
9d5e7640
IR
9433 return true;
9434 }
8644a673 9435 }
83197f37 9436 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9437 && node == NULL
83197f37
IR
9438 && pattern_stmt
9439 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9440 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9441 {
9442 /* Analyze PATTERN_STMT too. */
73fbfcad 9443 if (dump_enabled_p ())
83197f37 9444 {
78c60e3d
SS
9445 dump_printf_loc (MSG_NOTE, vect_location,
9446 "==> examining pattern statement: ");
9447 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9448 }
9449
891ad31c 9450 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
68435eb2 9451 node_instance, cost_vec))
83197f37
IR
9452 return false;
9453 }
ebfd146a 9454
8644a673
IR
9455 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9456 {
9457 case vect_internal_def:
9458 break;
ebfd146a 9459
8644a673 9460 case vect_reduction_def:
7c5222ff 9461 case vect_nested_cycle:
14a61437
RB
9462 gcc_assert (!bb_vinfo
9463 && (relevance == vect_used_in_outer
9464 || relevance == vect_used_in_outer_by_reduction
9465 || relevance == vect_used_by_reduction
b28ead45
AH
9466 || relevance == vect_unused_in_scope
9467 || relevance == vect_used_only_live));
8644a673
IR
9468 break;
9469
9470 case vect_induction_def:
e7baeb39
RB
9471 gcc_assert (!bb_vinfo);
9472 break;
9473
8644a673
IR
9474 case vect_constant_def:
9475 case vect_external_def:
9476 case vect_unknown_def_type:
9477 default:
9478 gcc_unreachable ();
9479 }
ebfd146a 9480
8644a673 9481 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9482 {
8644a673 9483 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9484 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9485 || (is_gimple_call (stmt)
9486 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9487 *need_to_vectorize = true;
ebfd146a
IR
9488 }
9489
b1af7da6
RB
9490 if (PURE_SLP_STMT (stmt_info) && !node)
9491 {
9492 dump_printf_loc (MSG_NOTE, vect_location,
9493 "handled only by SLP analysis\n");
9494 return true;
9495 }
9496
9497 ok = true;
9498 if (!bb_vinfo
9499 && (STMT_VINFO_RELEVANT_P (stmt_info)
9500 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
68435eb2
RB
9501 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9502 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9503 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9504 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9505 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9506 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9507 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9508 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9509 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9510 cost_vec)
9511 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9512 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9513 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
b1af7da6
RB
9514 else
9515 {
9516 if (bb_vinfo)
68435eb2
RB
9517 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9518 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9519 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9520 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9521 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9522 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9523 cost_vec)
9524 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9525 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9526 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9527 cost_vec)
9528 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9529 cost_vec));
b1af7da6 9530 }
8644a673
IR
9531
9532 if (!ok)
ebfd146a 9533 {
73fbfcad 9534 if (dump_enabled_p ())
8644a673 9535 {
78c60e3d
SS
9536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9537 "not vectorized: relevant stmt not ");
9538 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9540 }
b8698a0f 9541
ebfd146a
IR
9542 return false;
9543 }
9544
8644a673
IR
9545 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9546 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9547 if (!bb_vinfo
9548 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9549 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
ebfd146a 9550 {
73fbfcad 9551 if (dump_enabled_p ())
8644a673 9552 {
78c60e3d 9553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9554 "not vectorized: live stmt not supported: ");
78c60e3d 9555 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9556 }
b8698a0f 9557
8644a673 9558 return false;
ebfd146a
IR
9559 }
9560
ebfd146a
IR
9561 return true;
9562}
9563
9564
9565/* Function vect_transform_stmt.
9566
9567 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9568
9569bool
355fe088 9570vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9571 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9572 slp_instance slp_node_instance)
9573{
9574 bool is_store = false;
355fe088 9575 gimple *vec_stmt = NULL;
ebfd146a 9576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 9577 bool done;
ebfd146a 9578
fce57248 9579 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 9580 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9581
e57d9a82
RB
9582 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9583 && nested_in_vect_loop_p
9584 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9585 stmt));
9586
ebfd146a
IR
9587 switch (STMT_VINFO_TYPE (stmt_info))
9588 {
9589 case type_demotion_vec_info_type:
ebfd146a 9590 case type_promotion_vec_info_type:
ebfd146a 9591 case type_conversion_vec_info_type:
68435eb2 9592 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9593 gcc_assert (done);
9594 break;
9595
9596 case induc_vec_info_type:
68435eb2 9597 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9598 gcc_assert (done);
9599 break;
9600
9dc3f7de 9601 case shift_vec_info_type:
68435eb2 9602 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9603 gcc_assert (done);
9604 break;
9605
ebfd146a 9606 case op_vec_info_type:
68435eb2 9607 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9608 gcc_assert (done);
9609 break;
9610
9611 case assignment_vec_info_type:
68435eb2 9612 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9613 gcc_assert (done);
9614 break;
9615
9616 case load_vec_info_type:
b8698a0f 9617 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9618 slp_node_instance, NULL);
ebfd146a
IR
9619 gcc_assert (done);
9620 break;
9621
9622 case store_vec_info_type:
68435eb2 9623 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9624 gcc_assert (done);
0d0293ac 9625 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9626 {
9627 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9628 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9629 one are skipped, and there vec_stmt_info shouldn't be freed
9630 meanwhile. */
0d0293ac 9631 *grouped_store = true;
f307441a 9632 stmt_vec_info group_info
2c53b149
RB
9633 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
9634 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
ebfd146a 9635 is_store = true;
f307441a 9636 }
ebfd146a
IR
9637 else
9638 is_store = true;
9639 break;
9640
9641 case condition_vec_info_type:
68435eb2 9642 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
ebfd146a
IR
9643 gcc_assert (done);
9644 break;
9645
42fd8198 9646 case comparison_vec_info_type:
68435eb2 9647 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
42fd8198
IE
9648 gcc_assert (done);
9649 break;
9650
ebfd146a 9651 case call_vec_info_type:
68435eb2 9652 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9653 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9654 break;
9655
0136f8f0 9656 case call_simd_clone_vec_info_type:
68435eb2 9657 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
0136f8f0
AH
9658 stmt = gsi_stmt (*gsi);
9659 break;
9660
ebfd146a 9661 case reduc_vec_info_type:
891ad31c 9662 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9663 slp_node_instance, NULL);
ebfd146a
IR
9664 gcc_assert (done);
9665 break;
9666
9667 default:
9668 if (!STMT_VINFO_LIVE_P (stmt_info))
9669 {
73fbfcad 9670 if (dump_enabled_p ())
78c60e3d 9671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9672 "stmt not supported.\n");
ebfd146a
IR
9673 gcc_unreachable ();
9674 }
9675 }
9676
225ce44b
RB
9677 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9678 This would break hybrid SLP vectorization. */
9679 if (slp_node)
d90f8440
RB
9680 gcc_assert (!vec_stmt
9681 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9682
ebfd146a
IR
9683 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9684 is being vectorized, but outside the immediately enclosing loop. */
9685 if (vec_stmt
e57d9a82 9686 && nested_p
ebfd146a
IR
9687 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9688 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9689 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9690 vect_used_in_outer_by_reduction))
ebfd146a 9691 {
a70d6342
IR
9692 struct loop *innerloop = LOOP_VINFO_LOOP (
9693 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9694 imm_use_iterator imm_iter;
9695 use_operand_p use_p;
9696 tree scalar_dest;
355fe088 9697 gimple *exit_phi;
ebfd146a 9698
73fbfcad 9699 if (dump_enabled_p ())
78c60e3d 9700 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9701 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9702
9703 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9704 (to be used when vectorizing outer-loop stmts that use the DEF of
9705 STMT). */
9706 if (gimple_code (stmt) == GIMPLE_PHI)
9707 scalar_dest = PHI_RESULT (stmt);
9708 else
9709 scalar_dest = gimple_assign_lhs (stmt);
9710
9711 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9712 {
9713 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9714 {
9715 exit_phi = USE_STMT (use_p);
9716 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9717 }
9718 }
9719 }
9720
9721 /* Handle stmts whose DEF is used outside the loop-nest that is
9722 being vectorized. */
68a0f2ff 9723 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9724 {
68435eb2 9725 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
ebfd146a
IR
9726 gcc_assert (done);
9727 }
9728
9729 if (vec_stmt)
83197f37 9730 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9731
b8698a0f 9732 return is_store;
ebfd146a
IR
9733}
9734
9735
b8698a0f 9736/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9737 stmt_vec_info. */
9738
9739void
355fe088 9740vect_remove_stores (gimple *first_stmt)
ebfd146a 9741{
355fe088
TS
9742 gimple *next = first_stmt;
9743 gimple *tmp;
ebfd146a
IR
9744 gimple_stmt_iterator next_si;
9745
9746 while (next)
9747 {
78048b1c
JJ
9748 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9749
2c53b149 9750 tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
78048b1c
JJ
9751 if (is_pattern_stmt_p (stmt_info))
9752 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9753 /* Free the attached stmt_vec_info and remove the stmt. */
9754 next_si = gsi_for_stmt (next);
3d3f2249 9755 unlink_stmt_vdef (next);
ebfd146a 9756 gsi_remove (&next_si, true);
3d3f2249 9757 release_defs (next);
ebfd146a
IR
9758 free_stmt_vec_info (next);
9759 next = tmp;
9760 }
9761}
9762
9763
9764/* Function new_stmt_vec_info.
9765
9766 Create and initialize a new stmt_vec_info struct for STMT. */
9767
9768stmt_vec_info
310213d4 9769new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9770{
9771 stmt_vec_info res;
9772 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9773
9774 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9775 STMT_VINFO_STMT (res) = stmt;
310213d4 9776 res->vinfo = vinfo;
8644a673 9777 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9778 STMT_VINFO_LIVE_P (res) = false;
9779 STMT_VINFO_VECTYPE (res) = NULL;
9780 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9781 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9782 STMT_VINFO_IN_PATTERN_P (res) = false;
9783 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9784 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9785 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9786 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9787 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9788
ebfd146a
IR
9789 if (gimple_code (stmt) == GIMPLE_PHI
9790 && is_loop_header_bb_p (gimple_bb (stmt)))
9791 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9792 else
8644a673
IR
9793 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9794
9771b263 9795 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9796 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9797 STMT_VINFO_NUM_SLP_USES (res) = 0;
9798
2c53b149
RB
9799 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9800 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9801 res->size = 0; /* GROUP_SIZE */
9802 res->store_count = 0; /* GROUP_STORE_COUNT */
9803 res->gap = 0; /* GROUP_GAP */
9804 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
ebfd146a 9805
ca823c85
RB
9806 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9807 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9808
ebfd146a
IR
9809 return res;
9810}
9811
9812
f8c0baaf 9813/* Set the current stmt_vec_info vector to V. */
ebfd146a
IR
9814
9815void
f8c0baaf 9816set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
ebfd146a 9817{
f8c0baaf 9818 stmt_vec_info_vec = v;
ebfd146a
IR
9819}
9820
f8c0baaf 9821/* Free the stmt_vec_info entries in V and release V. */
ebfd146a
IR
9822
9823void
f8c0baaf 9824free_stmt_vec_infos (vec<stmt_vec_info> *v)
ebfd146a 9825{
93675444 9826 unsigned int i;
3161455c 9827 stmt_vec_info info;
f8c0baaf 9828 FOR_EACH_VEC_ELT (*v, i, info)
93675444 9829 if (info != NULL)
3161455c 9830 free_stmt_vec_info (STMT_VINFO_STMT (info));
f8c0baaf
RB
9831 if (v == stmt_vec_info_vec)
9832 stmt_vec_info_vec = NULL;
9833 v->release ();
ebfd146a
IR
9834}
9835
9836
9837/* Free stmt vectorization related info. */
9838
9839void
355fe088 9840free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9841{
9842 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9843
9844 if (!stmt_info)
9845 return;
9846
78048b1c
JJ
9847 /* Check if this statement has a related "pattern stmt"
9848 (introduced by the vectorizer during the pattern recognition
9849 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9850 too. */
9851 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9852 {
e3947d80
RS
9853 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9854 for (gimple_stmt_iterator si = gsi_start (seq);
9855 !gsi_end_p (si); gsi_next (&si))
9856 {
9857 gimple *seq_stmt = gsi_stmt (si);
9858 gimple_set_bb (seq_stmt, NULL);
9859 tree lhs = gimple_get_lhs (seq_stmt);
9860 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9861 release_ssa_name (lhs);
9862 free_stmt_vec_info (seq_stmt);
9863 }
78048b1c
JJ
9864 stmt_vec_info patt_info
9865 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9866 if (patt_info)
9867 {
355fe088 9868 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9869 gimple_set_bb (patt_stmt, NULL);
9870 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9871 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9872 release_ssa_name (lhs);
f0281fde 9873 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9874 }
9875 }
9876
9771b263 9877 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9878 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9879 set_vinfo_for_stmt (stmt, NULL);
9880 free (stmt_info);
9881}
9882
9883
bb67d9c7 9884/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9885
bb67d9c7 9886 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9887 by the target. */
9888
c803b2a9 9889tree
86e36728 9890get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9891{
c7d97b28 9892 tree orig_scalar_type = scalar_type;
3bd8f481 9893 scalar_mode inner_mode;
ef4bddc2 9894 machine_mode simd_mode;
86e36728 9895 poly_uint64 nunits;
ebfd146a
IR
9896 tree vectype;
9897
3bd8f481
RS
9898 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9899 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9900 return NULL_TREE;
9901
3bd8f481 9902 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9903
7b7b1813
RG
9904 /* For vector types of elements whose mode precision doesn't
9905 match their types precision we use a element type of mode
9906 precision. The vectorization routines will have to make sure
48f2e373
RB
9907 they support the proper result truncation/extension.
9908 We also make sure to build vector types with INTEGER_TYPE
9909 component type only. */
6d7971b8 9910 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9911 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9912 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9913 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9914 TYPE_UNSIGNED (scalar_type));
6d7971b8 9915
ccbf5bb4
RG
9916 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9917 When the component mode passes the above test simply use a type
9918 corresponding to that mode. The theory is that any use that
9919 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9920 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9921 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9922 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9923
9924 /* We can't build a vector type of elements with alignment bigger than
9925 their size. */
dfc2e2ac 9926 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9927 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9928 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9929
dfc2e2ac
RB
9930 /* If we felt back to using the mode fail if there was
9931 no scalar type for it. */
9932 if (scalar_type == NULL_TREE)
9933 return NULL_TREE;
9934
bb67d9c7
RG
9935 /* If no size was supplied use the mode the target prefers. Otherwise
9936 lookup a vector mode of the specified size. */
86e36728 9937 if (known_eq (size, 0U))
bb67d9c7 9938 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9939 else if (!multiple_p (size, nbytes, &nunits)
9940 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9941 return NULL_TREE;
4c8fd8ac 9942 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9943 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9944 return NULL_TREE;
ebfd146a
IR
9945
9946 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9947
9948 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9949 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9950 return NULL_TREE;
ebfd146a 9951
c7d97b28
RB
9952 /* Re-attach the address-space qualifier if we canonicalized the scalar
9953 type. */
9954 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9955 return build_qualified_type
9956 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9957
ebfd146a
IR
9958 return vectype;
9959}
9960
86e36728 9961poly_uint64 current_vector_size;
bb67d9c7
RG
9962
9963/* Function get_vectype_for_scalar_type.
9964
9965 Returns the vector type corresponding to SCALAR_TYPE as supported
9966 by the target. */
9967
9968tree
9969get_vectype_for_scalar_type (tree scalar_type)
9970{
9971 tree vectype;
9972 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9973 current_vector_size);
9974 if (vectype
86e36728 9975 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9976 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9977 return vectype;
9978}
9979
42fd8198
IE
9980/* Function get_mask_type_for_scalar_type.
9981
9982 Returns the mask type corresponding to a result of comparison
9983 of vectors of specified SCALAR_TYPE as supported by target. */
9984
9985tree
9986get_mask_type_for_scalar_type (tree scalar_type)
9987{
9988 tree vectype = get_vectype_for_scalar_type (scalar_type);
9989
9990 if (!vectype)
9991 return NULL;
9992
9993 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9994 current_vector_size);
9995}
9996
b690cc0f
RG
9997/* Function get_same_sized_vectype
9998
9999 Returns a vector type corresponding to SCALAR_TYPE of size
10000 VECTOR_TYPE if supported by the target. */
10001
10002tree
bb67d9c7 10003get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 10004{
2568d8a1 10005 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
10006 return build_same_sized_truth_vector_type (vector_type);
10007
bb67d9c7
RG
10008 return get_vectype_for_scalar_type_and_size
10009 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
10010}
10011
ebfd146a
IR
10012/* Function vect_is_simple_use.
10013
10014 Input:
81c40241
RB
10015 VINFO - the vect info of the loop or basic block that is being vectorized.
10016 OPERAND - operand in the loop or bb.
10017 Output:
894dd753 10018 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME.
81c40241 10019 DT - the type of definition
ebfd146a
IR
10020
10021 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 10022 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 10023 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 10024 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
10025 is the case in reduction/induction computations).
10026 For basic blocks, supportable operands are constants and bb invariants.
10027 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
10028
10029bool
894dd753
RS
10030vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10031 gimple **def_stmt_out)
b8698a0f 10032{
894dd753
RS
10033 if (def_stmt_out)
10034 *def_stmt_out = NULL;
3fc356dc 10035 *dt = vect_unknown_def_type;
b8698a0f 10036
73fbfcad 10037 if (dump_enabled_p ())
ebfd146a 10038 {
78c60e3d
SS
10039 dump_printf_loc (MSG_NOTE, vect_location,
10040 "vect_is_simple_use: operand ");
10041 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 10042 dump_printf (MSG_NOTE, "\n");
ebfd146a 10043 }
b8698a0f 10044
b758f602 10045 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
10046 {
10047 *dt = vect_constant_def;
10048 return true;
10049 }
b8698a0f 10050
ebfd146a
IR
10051 if (is_gimple_min_invariant (operand))
10052 {
8644a673 10053 *dt = vect_external_def;
ebfd146a
IR
10054 return true;
10055 }
10056
ebfd146a
IR
10057 if (TREE_CODE (operand) != SSA_NAME)
10058 {
73fbfcad 10059 if (dump_enabled_p ())
af29617a
AH
10060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10061 "not ssa-name.\n");
ebfd146a
IR
10062 return false;
10063 }
b8698a0f 10064
3fc356dc 10065 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 10066 {
3fc356dc
RB
10067 *dt = vect_external_def;
10068 return true;
ebfd146a
IR
10069 }
10070
894dd753
RS
10071 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10072 if (def_stmt_out)
10073 *def_stmt_out = def_stmt;
73fbfcad 10074 if (dump_enabled_p ())
ebfd146a 10075 {
78c60e3d 10076 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
894dd753 10077 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
10078 }
10079
894dd753 10080 if (! vect_stmt_in_region_p (vinfo, def_stmt))
8644a673 10081 *dt = vect_external_def;
ebfd146a
IR
10082 else
10083 {
894dd753 10084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
603cca93 10085 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
10086 }
10087
2e8ab70c
RB
10088 if (dump_enabled_p ())
10089 {
10090 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
10091 switch (*dt)
10092 {
10093 case vect_uninitialized_def:
10094 dump_printf (MSG_NOTE, "uninitialized\n");
10095 break;
10096 case vect_constant_def:
10097 dump_printf (MSG_NOTE, "constant\n");
10098 break;
10099 case vect_external_def:
10100 dump_printf (MSG_NOTE, "external\n");
10101 break;
10102 case vect_internal_def:
10103 dump_printf (MSG_NOTE, "internal\n");
10104 break;
10105 case vect_induction_def:
10106 dump_printf (MSG_NOTE, "induction\n");
10107 break;
10108 case vect_reduction_def:
10109 dump_printf (MSG_NOTE, "reduction\n");
10110 break;
10111 case vect_double_reduction_def:
10112 dump_printf (MSG_NOTE, "double reduction\n");
10113 break;
10114 case vect_nested_cycle:
10115 dump_printf (MSG_NOTE, "nested cycle\n");
10116 break;
10117 case vect_unknown_def_type:
10118 dump_printf (MSG_NOTE, "unknown\n");
10119 break;
10120 }
10121 }
10122
81c40241 10123 if (*dt == vect_unknown_def_type)
ebfd146a 10124 {
73fbfcad 10125 if (dump_enabled_p ())
78c60e3d 10126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10127 "Unsupported pattern.\n");
ebfd146a
IR
10128 return false;
10129 }
10130
894dd753 10131 switch (gimple_code (def_stmt))
ebfd146a
IR
10132 {
10133 case GIMPLE_PHI:
ebfd146a 10134 case GIMPLE_ASSIGN:
ebfd146a 10135 case GIMPLE_CALL:
81c40241 10136 break;
ebfd146a 10137 default:
73fbfcad 10138 if (dump_enabled_p ())
78c60e3d 10139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10140 "unsupported defining stmt:\n");
ebfd146a
IR
10141 return false;
10142 }
10143
10144 return true;
10145}
10146
81c40241 10147/* Function vect_is_simple_use.
b690cc0f 10148
81c40241 10149 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10150 type of OPERAND and stores it to *VECTYPE. If the definition of
10151 OPERAND is vect_uninitialized_def, vect_constant_def or
10152 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10153 is responsible to compute the best suited vector type for the
10154 scalar operand. */
10155
10156bool
894dd753
RS
10157vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10158 tree *vectype, gimple **def_stmt_out)
b690cc0f 10159{
894dd753
RS
10160 gimple *def_stmt;
10161 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt))
b690cc0f
RG
10162 return false;
10163
894dd753
RS
10164 if (def_stmt_out)
10165 *def_stmt_out = def_stmt;
10166
b690cc0f
RG
10167 /* Now get a vector type if the def is internal, otherwise supply
10168 NULL_TREE and leave it up to the caller to figure out a proper
10169 type for the use stmt. */
10170 if (*dt == vect_internal_def
10171 || *dt == vect_induction_def
10172 || *dt == vect_reduction_def
10173 || *dt == vect_double_reduction_def
10174 || *dt == vect_nested_cycle)
10175 {
894dd753 10176 stmt_vec_info stmt_info = vinfo_for_stmt (def_stmt);
83197f37
IR
10177
10178 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10179 && !STMT_VINFO_RELEVANT (stmt_info)
10180 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 10181 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 10182
b690cc0f
RG
10183 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10184 gcc_assert (*vectype != NULL_TREE);
10185 }
10186 else if (*dt == vect_uninitialized_def
10187 || *dt == vect_constant_def
10188 || *dt == vect_external_def)
10189 *vectype = NULL_TREE;
10190 else
10191 gcc_unreachable ();
10192
10193 return true;
10194}
10195
ebfd146a
IR
10196
10197/* Function supportable_widening_operation
10198
b8698a0f
L
10199 Check whether an operation represented by the code CODE is a
10200 widening operation that is supported by the target platform in
b690cc0f
RG
10201 vector form (i.e., when operating on arguments of type VECTYPE_IN
10202 producing a result of type VECTYPE_OUT).
b8698a0f 10203
1bda738b
JJ
10204 Widening operations we currently support are NOP (CONVERT), FLOAT,
10205 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10206 are supported by the target platform either directly (via vector
10207 tree-codes), or via target builtins.
ebfd146a
IR
10208
10209 Output:
b8698a0f
L
10210 - CODE1 and CODE2 are codes of vector operations to be used when
10211 vectorizing the operation, if available.
ebfd146a
IR
10212 - MULTI_STEP_CVT determines the number of required intermediate steps in
10213 case of multi-step conversion (like char->short->int - in that case
10214 MULTI_STEP_CVT will be 1).
b8698a0f
L
10215 - INTERM_TYPES contains the intermediate type required to perform the
10216 widening operation (short in the above example). */
ebfd146a
IR
10217
10218bool
355fe088 10219supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10220 tree vectype_out, tree vectype_in,
ebfd146a
IR
10221 enum tree_code *code1, enum tree_code *code2,
10222 int *multi_step_cvt,
9771b263 10223 vec<tree> *interm_types)
ebfd146a
IR
10224{
10225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10226 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10227 struct loop *vect_loop = NULL;
ef4bddc2 10228 machine_mode vec_mode;
81f40b79 10229 enum insn_code icode1, icode2;
ebfd146a 10230 optab optab1, optab2;
b690cc0f
RG
10231 tree vectype = vectype_in;
10232 tree wide_vectype = vectype_out;
ebfd146a 10233 enum tree_code c1, c2;
4a00c761
JJ
10234 int i;
10235 tree prev_type, intermediate_type;
ef4bddc2 10236 machine_mode intermediate_mode, prev_mode;
4a00c761 10237 optab optab3, optab4;
ebfd146a 10238
4a00c761 10239 *multi_step_cvt = 0;
4ef69dfc
IR
10240 if (loop_info)
10241 vect_loop = LOOP_VINFO_LOOP (loop_info);
10242
ebfd146a
IR
10243 switch (code)
10244 {
10245 case WIDEN_MULT_EXPR:
6ae6116f
RH
10246 /* The result of a vectorized widening operation usually requires
10247 two vectors (because the widened results do not fit into one vector).
10248 The generated vector results would normally be expected to be
10249 generated in the same order as in the original scalar computation,
10250 i.e. if 8 results are generated in each vector iteration, they are
10251 to be organized as follows:
10252 vect1: [res1,res2,res3,res4],
10253 vect2: [res5,res6,res7,res8].
10254
10255 However, in the special case that the result of the widening
10256 operation is used in a reduction computation only, the order doesn't
10257 matter (because when vectorizing a reduction we change the order of
10258 the computation). Some targets can take advantage of this and
10259 generate more efficient code. For example, targets like Altivec,
10260 that support widen_mult using a sequence of {mult_even,mult_odd}
10261 generate the following vectors:
10262 vect1: [res1,res3,res5,res7],
10263 vect2: [res2,res4,res6,res8].
10264
10265 When vectorizing outer-loops, we execute the inner-loop sequentially
10266 (each vectorized inner-loop iteration contributes to VF outer-loop
10267 iterations in parallel). We therefore don't allow to change the
10268 order of the computation in the inner-loop during outer-loop
10269 vectorization. */
10270 /* TODO: Another case in which order doesn't *really* matter is when we
10271 widen and then contract again, e.g. (short)((int)x * y >> 8).
10272 Normally, pack_trunc performs an even/odd permute, whereas the
10273 repack from an even/odd expansion would be an interleave, which
10274 would be significantly simpler for e.g. AVX2. */
10275 /* In any case, in order to avoid duplicating the code below, recurse
10276 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10277 are properly set up for the caller. If we fail, we'll continue with
10278 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10279 if (vect_loop
10280 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10281 && !nested_in_vect_loop_p (vect_loop, stmt)
10282 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10283 stmt, vectype_out, vectype_in,
a86ec597
RH
10284 code1, code2, multi_step_cvt,
10285 interm_types))
ebc047a2
CH
10286 {
10287 /* Elements in a vector with vect_used_by_reduction property cannot
10288 be reordered if the use chain with this property does not have the
10289 same operation. One such an example is s += a * b, where elements
10290 in a and b cannot be reordered. Here we check if the vector defined
10291 by STMT is only directly used in the reduction statement. */
10292 tree lhs = gimple_assign_lhs (stmt);
10293 use_operand_p dummy;
355fe088 10294 gimple *use_stmt;
ebc047a2
CH
10295 stmt_vec_info use_stmt_info = NULL;
10296 if (single_imm_use (lhs, &dummy, &use_stmt)
10297 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10298 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10299 return true;
10300 }
4a00c761
JJ
10301 c1 = VEC_WIDEN_MULT_LO_EXPR;
10302 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10303 break;
10304
81c40241
RB
10305 case DOT_PROD_EXPR:
10306 c1 = DOT_PROD_EXPR;
10307 c2 = DOT_PROD_EXPR;
10308 break;
10309
10310 case SAD_EXPR:
10311 c1 = SAD_EXPR;
10312 c2 = SAD_EXPR;
10313 break;
10314
6ae6116f
RH
10315 case VEC_WIDEN_MULT_EVEN_EXPR:
10316 /* Support the recursion induced just above. */
10317 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10318 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10319 break;
10320
36ba4aae 10321 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10322 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10323 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10324 break;
10325
ebfd146a 10326 CASE_CONVERT:
4a00c761
JJ
10327 c1 = VEC_UNPACK_LO_EXPR;
10328 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10329 break;
10330
10331 case FLOAT_EXPR:
4a00c761
JJ
10332 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10333 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10334 break;
10335
10336 case FIX_TRUNC_EXPR:
1bda738b
JJ
10337 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10338 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10339 break;
ebfd146a
IR
10340
10341 default:
10342 gcc_unreachable ();
10343 }
10344
6ae6116f 10345 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10346 std::swap (c1, c2);
4a00c761 10347
ebfd146a
IR
10348 if (code == FIX_TRUNC_EXPR)
10349 {
10350 /* The signedness is determined from output operand. */
b690cc0f
RG
10351 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10352 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10353 }
10354 else
10355 {
10356 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10357 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10358 }
10359
10360 if (!optab1 || !optab2)
10361 return false;
10362
10363 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10364 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10365 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10366 return false;
10367
4a00c761
JJ
10368 *code1 = c1;
10369 *code2 = c2;
10370
10371 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10372 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10373 /* For scalar masks we may have different boolean
10374 vector types having the same QImode. Thus we
10375 add additional check for elements number. */
10376 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10377 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10378 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10379
b8698a0f 10380 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10381 types. */
ebfd146a 10382
4a00c761
JJ
10383 prev_type = vectype;
10384 prev_mode = vec_mode;
b8698a0f 10385
4a00c761
JJ
10386 if (!CONVERT_EXPR_CODE_P (code))
10387 return false;
b8698a0f 10388
4a00c761
JJ
10389 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10390 intermediate steps in promotion sequence. We try
10391 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10392 not. */
9771b263 10393 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10394 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10395 {
10396 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10397 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10398 {
7cfb4d93 10399 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10400 if (intermediate_mode != TYPE_MODE (intermediate_type))
10401 return false;
10402 }
10403 else
10404 intermediate_type
10405 = lang_hooks.types.type_for_mode (intermediate_mode,
10406 TYPE_UNSIGNED (prev_type));
10407
4a00c761
JJ
10408 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10409 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10410
10411 if (!optab3 || !optab4
10412 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10413 || insn_data[icode1].operand[0].mode != intermediate_mode
10414 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10415 || insn_data[icode2].operand[0].mode != intermediate_mode
10416 || ((icode1 = optab_handler (optab3, intermediate_mode))
10417 == CODE_FOR_nothing)
10418 || ((icode2 = optab_handler (optab4, intermediate_mode))
10419 == CODE_FOR_nothing))
10420 break;
ebfd146a 10421
9771b263 10422 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10423 (*multi_step_cvt)++;
10424
10425 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10426 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10427 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10428 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10429 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10430
10431 prev_type = intermediate_type;
10432 prev_mode = intermediate_mode;
ebfd146a
IR
10433 }
10434
9771b263 10435 interm_types->release ();
4a00c761 10436 return false;
ebfd146a
IR
10437}
10438
10439
10440/* Function supportable_narrowing_operation
10441
b8698a0f
L
10442 Check whether an operation represented by the code CODE is a
10443 narrowing operation that is supported by the target platform in
b690cc0f
RG
10444 vector form (i.e., when operating on arguments of type VECTYPE_IN
10445 and producing a result of type VECTYPE_OUT).
b8698a0f 10446
1bda738b
JJ
10447 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10448 and FLOAT. This function checks if these operations are supported by
ebfd146a
IR
10449 the target platform directly via vector tree-codes.
10450
10451 Output:
b8698a0f
L
10452 - CODE1 is the code of a vector operation to be used when
10453 vectorizing the operation, if available.
ebfd146a
IR
10454 - MULTI_STEP_CVT determines the number of required intermediate steps in
10455 case of multi-step conversion (like int->short->char - in that case
10456 MULTI_STEP_CVT will be 1).
10457 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10458 narrowing operation (short in the above example). */
ebfd146a
IR
10459
10460bool
10461supportable_narrowing_operation (enum tree_code code,
b690cc0f 10462 tree vectype_out, tree vectype_in,
ebfd146a 10463 enum tree_code *code1, int *multi_step_cvt,
9771b263 10464 vec<tree> *interm_types)
ebfd146a 10465{
ef4bddc2 10466 machine_mode vec_mode;
ebfd146a
IR
10467 enum insn_code icode1;
10468 optab optab1, interm_optab;
b690cc0f
RG
10469 tree vectype = vectype_in;
10470 tree narrow_vectype = vectype_out;
ebfd146a 10471 enum tree_code c1;
3ae0661a 10472 tree intermediate_type, prev_type;
ef4bddc2 10473 machine_mode intermediate_mode, prev_mode;
ebfd146a 10474 int i;
4a00c761 10475 bool uns;
ebfd146a 10476
4a00c761 10477 *multi_step_cvt = 0;
ebfd146a
IR
10478 switch (code)
10479 {
10480 CASE_CONVERT:
10481 c1 = VEC_PACK_TRUNC_EXPR;
10482 break;
10483
10484 case FIX_TRUNC_EXPR:
10485 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10486 break;
10487
10488 case FLOAT_EXPR:
1bda738b
JJ
10489 c1 = VEC_PACK_FLOAT_EXPR;
10490 break;
ebfd146a
IR
10491
10492 default:
10493 gcc_unreachable ();
10494 }
10495
10496 if (code == FIX_TRUNC_EXPR)
10497 /* The signedness is determined from output operand. */
b690cc0f 10498 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10499 else
10500 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10501
10502 if (!optab1)
10503 return false;
10504
10505 vec_mode = TYPE_MODE (vectype);
947131ba 10506 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10507 return false;
10508
4a00c761
JJ
10509 *code1 = c1;
10510
10511 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10512 /* For scalar masks we may have different boolean
10513 vector types having the same QImode. Thus we
10514 add additional check for elements number. */
10515 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10516 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10517 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10518
1bda738b
JJ
10519 if (code == FLOAT_EXPR)
10520 return false;
10521
ebfd146a
IR
10522 /* Check if it's a multi-step conversion that can be done using intermediate
10523 types. */
4a00c761 10524 prev_mode = vec_mode;
3ae0661a 10525 prev_type = vectype;
4a00c761
JJ
10526 if (code == FIX_TRUNC_EXPR)
10527 uns = TYPE_UNSIGNED (vectype_out);
10528 else
10529 uns = TYPE_UNSIGNED (vectype);
10530
10531 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10532 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10533 costly than signed. */
10534 if (code == FIX_TRUNC_EXPR && uns)
10535 {
10536 enum insn_code icode2;
10537
10538 intermediate_type
10539 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10540 interm_optab
10541 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10542 if (interm_optab != unknown_optab
4a00c761
JJ
10543 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10544 && insn_data[icode1].operand[0].mode
10545 == insn_data[icode2].operand[0].mode)
10546 {
10547 uns = false;
10548 optab1 = interm_optab;
10549 icode1 = icode2;
10550 }
10551 }
ebfd146a 10552
4a00c761
JJ
10553 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10554 intermediate steps in promotion sequence. We try
10555 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10556 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10557 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10558 {
10559 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10560 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10561 {
7cfb4d93 10562 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10563 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10564 return false;
3ae0661a
IE
10565 }
10566 else
10567 intermediate_type
10568 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10569 interm_optab
10570 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10571 optab_default);
10572 if (!interm_optab
10573 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10574 || insn_data[icode1].operand[0].mode != intermediate_mode
10575 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10576 == CODE_FOR_nothing))
10577 break;
10578
9771b263 10579 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10580 (*multi_step_cvt)++;
10581
10582 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10583 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10584 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10585 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10586
10587 prev_mode = intermediate_mode;
3ae0661a 10588 prev_type = intermediate_type;
4a00c761 10589 optab1 = interm_optab;
ebfd146a
IR
10590 }
10591
9771b263 10592 interm_types->release ();
4a00c761 10593 return false;
ebfd146a 10594}
7cfb4d93
RS
10595
10596/* Generate and return a statement that sets vector mask MASK such that
10597 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10598
10599gcall *
10600vect_gen_while (tree mask, tree start_index, tree end_index)
10601{
10602 tree cmp_type = TREE_TYPE (start_index);
10603 tree mask_type = TREE_TYPE (mask);
10604 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10605 cmp_type, mask_type,
10606 OPTIMIZE_FOR_SPEED));
10607 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10608 start_index, end_index,
10609 build_zero_cst (mask_type));
10610 gimple_call_set_lhs (call, mask);
10611 return call;
10612}
535e7c11
RS
10613
10614/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10615 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10616
10617tree
10618vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10619 tree end_index)
10620{
10621 tree tmp = make_ssa_name (mask_type);
10622 gcall *call = vect_gen_while (tmp, start_index, end_index);
10623 gimple_seq_add_stmt (seq, call);
10624 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10625}
1f3cb663
RS
10626
10627/* Try to compute the vector types required to vectorize STMT_INFO,
10628 returning true on success and false if vectorization isn't possible.
10629
10630 On success:
10631
10632 - Set *STMT_VECTYPE_OUT to:
10633 - NULL_TREE if the statement doesn't need to be vectorized;
10634 - boolean_type_node if the statement is a boolean operation whose
10635 vector type can only be determined once all the other vector types
10636 are known; and
10637 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10638
10639 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10640 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10641 statement does not help to determine the overall number of units. */
10642
10643bool
10644vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10645 tree *stmt_vectype_out,
10646 tree *nunits_vectype_out)
10647{
10648 gimple *stmt = stmt_info->stmt;
10649
10650 *stmt_vectype_out = NULL_TREE;
10651 *nunits_vectype_out = NULL_TREE;
10652
10653 if (gimple_get_lhs (stmt) == NULL_TREE
10654 /* MASK_STORE has no lhs, but is ok. */
10655 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10656 {
10657 if (is_a <gcall *> (stmt))
10658 {
10659 /* Ignore calls with no lhs. These must be calls to
10660 #pragma omp simd functions, and what vectorization factor
10661 it really needs can't be determined until
10662 vectorizable_simd_clone_call. */
10663 if (dump_enabled_p ())
10664 dump_printf_loc (MSG_NOTE, vect_location,
10665 "defer to SIMD clone analysis.\n");
10666 return true;
10667 }
10668
10669 if (dump_enabled_p ())
10670 {
10671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10672 "not vectorized: irregular stmt.");
10673 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10674 }
10675 return false;
10676 }
10677
10678 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10679 {
10680 if (dump_enabled_p ())
10681 {
10682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10683 "not vectorized: vector stmt in loop:");
10684 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10685 }
10686 return false;
10687 }
10688
10689 tree vectype;
10690 tree scalar_type = NULL_TREE;
10691 if (STMT_VINFO_VECTYPE (stmt_info))
10692 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10693 else
10694 {
10695 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10696 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10697 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10698 else
10699 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10700
10701 /* Pure bool ops don't participate in number-of-units computation.
10702 For comparisons use the types being compared. */
10703 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10704 && is_gimple_assign (stmt)
10705 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10706 {
10707 *stmt_vectype_out = boolean_type_node;
10708
10709 tree rhs1 = gimple_assign_rhs1 (stmt);
10710 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10711 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10712 scalar_type = TREE_TYPE (rhs1);
10713 else
10714 {
10715 if (dump_enabled_p ())
10716 dump_printf_loc (MSG_NOTE, vect_location,
10717 "pure bool operation.\n");
10718 return true;
10719 }
10720 }
10721
10722 if (dump_enabled_p ())
10723 {
10724 dump_printf_loc (MSG_NOTE, vect_location,
10725 "get vectype for scalar type: ");
10726 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10727 dump_printf (MSG_NOTE, "\n");
10728 }
10729 vectype = get_vectype_for_scalar_type (scalar_type);
10730 if (!vectype)
10731 {
10732 if (dump_enabled_p ())
10733 {
10734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10735 "not vectorized: unsupported data-type ");
10736 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10737 scalar_type);
10738 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10739 }
10740 return false;
10741 }
10742
10743 if (!*stmt_vectype_out)
10744 *stmt_vectype_out = vectype;
10745
10746 if (dump_enabled_p ())
10747 {
10748 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10749 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10750 dump_printf (MSG_NOTE, "\n");
10751 }
10752 }
10753
10754 /* Don't try to compute scalar types if the stmt produces a boolean
10755 vector; use the existing vector type instead. */
10756 tree nunits_vectype;
10757 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10758 nunits_vectype = vectype;
10759 else
10760 {
10761 /* The number of units is set according to the smallest scalar
10762 type (or the largest vector size, but we only support one
10763 vector size per vectorization). */
10764 if (*stmt_vectype_out != boolean_type_node)
10765 {
10766 HOST_WIDE_INT dummy;
10767 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10768 }
10769 if (dump_enabled_p ())
10770 {
10771 dump_printf_loc (MSG_NOTE, vect_location,
10772 "get vectype for scalar type: ");
10773 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10774 dump_printf (MSG_NOTE, "\n");
10775 }
10776 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10777 }
10778 if (!nunits_vectype)
10779 {
10780 if (dump_enabled_p ())
10781 {
10782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10783 "not vectorized: unsupported data-type ");
10784 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10785 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10786 }
10787 return false;
10788 }
10789
10790 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10791 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10792 {
10793 if (dump_enabled_p ())
10794 {
10795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10796 "not vectorized: different sized vector "
10797 "types in statement, ");
10798 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10799 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10800 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10801 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10802 }
10803 return false;
10804 }
10805
10806 if (dump_enabled_p ())
10807 {
10808 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10809 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10810 dump_printf (MSG_NOTE, "\n");
10811
10812 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10813 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10814 dump_printf (MSG_NOTE, "\n");
10815 }
10816
10817 *nunits_vectype_out = nunits_vectype;
10818 return true;
10819}
10820
10821/* Try to determine the correct vector type for STMT_INFO, which is a
10822 statement that produces a scalar boolean result. Return the vector
10823 type on success, otherwise return NULL_TREE. */
10824
10825tree
10826vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10827{
10828 gimple *stmt = stmt_info->stmt;
10829 tree mask_type = NULL;
10830 tree vectype, scalar_type;
10831
10832 if (is_gimple_assign (stmt)
10833 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10834 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10835 {
10836 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10837 mask_type = get_mask_type_for_scalar_type (scalar_type);
10838
10839 if (!mask_type)
10840 {
10841 if (dump_enabled_p ())
10842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10843 "not vectorized: unsupported mask\n");
10844 return NULL_TREE;
10845 }
10846 }
10847 else
10848 {
10849 tree rhs;
10850 ssa_op_iter iter;
1f3cb663
RS
10851 enum vect_def_type dt;
10852
10853 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10854 {
894dd753 10855 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
1f3cb663
RS
10856 {
10857 if (dump_enabled_p ())
10858 {
10859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10860 "not vectorized: can't compute mask type "
10861 "for statement, ");
10862 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10863 0);
10864 }
10865 return NULL_TREE;
10866 }
10867
10868 /* No vectype probably means external definition.
10869 Allow it in case there is another operand which
10870 allows to determine mask type. */
10871 if (!vectype)
10872 continue;
10873
10874 if (!mask_type)
10875 mask_type = vectype;
10876 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10877 TYPE_VECTOR_SUBPARTS (vectype)))
10878 {
10879 if (dump_enabled_p ())
10880 {
10881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10882 "not vectorized: different sized masks "
10883 "types in statement, ");
10884 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10885 mask_type);
10886 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10887 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10888 vectype);
10889 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10890 }
10891 return NULL_TREE;
10892 }
10893 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10894 != VECTOR_BOOLEAN_TYPE_P (vectype))
10895 {
10896 if (dump_enabled_p ())
10897 {
10898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10899 "not vectorized: mixed mask and "
10900 "nonmask vector types in statement, ");
10901 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10902 mask_type);
10903 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10904 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10905 vectype);
10906 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10907 }
10908 return NULL_TREE;
10909 }
10910 }
10911
10912 /* We may compare boolean value loaded as vector of integers.
10913 Fix mask_type in such case. */
10914 if (mask_type
10915 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10916 && gimple_code (stmt) == GIMPLE_ASSIGN
10917 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10918 mask_type = build_same_sized_truth_vector_type (mask_type);
10919 }
10920
10921 /* No mask_type should mean loop invariant predicate.
10922 This is probably a subject for optimization in if-conversion. */
10923 if (!mask_type && dump_enabled_p ())
10924 {
10925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10926 "not vectorized: can't compute mask type "
10927 "for statement, ");
10928 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10929 }
10930 return mask_type;
10931}