]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
[15/n] PR85694: Try to split existing casts in widened patterns
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2
RB
100
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109}
110
272c6793
RS
111/* Return a variable of type ELEM_TYPE[NELEMS]. */
112
113static tree
114create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115{
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
118}
119
120/* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
124
125static tree
355fe088 126read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
127 tree array, unsigned HOST_WIDE_INT n)
128{
129 tree vect_type, vect, vect_name, array_ref;
355fe088 130 gimple *new_stmt;
272c6793
RS
131
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
138
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
143
144 return vect_name;
145}
146
147/* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
150
151static void
355fe088 152write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
153 tree array, unsigned HOST_WIDE_INT n)
154{
155 tree array_ref;
355fe088 156 gimple *new_stmt;
272c6793
RS
157
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
164}
165
166/* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
169
170static tree
44fc7854 171create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 172{
44fc7854 173 tree mem_ref;
272c6793 174
272c6793
RS
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
644ffefd 177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
178 return mem_ref;
179}
180
3ba4ff41
RS
181/* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
183
184static void
185vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
186{
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190}
191
ebfd146a
IR
192/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193
194/* Function vect_mark_relevant.
195
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197
198static void
355fe088 199vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 200 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
201{
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 205 gimple *pattern_stmt;
ebfd146a 206
73fbfcad 207 if (dump_enabled_p ())
66c16fd9
RB
208 {
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
212 }
ebfd146a 213
83197f37
IR
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
ebfd146a
IR
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
219 {
97ecdb46
JJ
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
83197f37 224
97ecdb46
JJ
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
226
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
ebfd146a
IR
236 }
237
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
241
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
244 {
73fbfcad 245 if (dump_enabled_p ())
78c60e3d 246 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 247 "already marked relevant/live.\n");
ebfd146a
IR
248 return;
249 }
250
9771b263 251 worklist->safe_push (stmt);
ebfd146a
IR
252}
253
254
b28ead45
AH
255/* Function is_simple_and_all_uses_invariant
256
257 Return true if STMT is simple and all uses of it are invariant. */
258
259bool
260is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
261{
262 tree op;
b28ead45
AH
263 ssa_op_iter iter;
264
265 if (!is_gimple_assign (stmt))
266 return false;
267
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
269 {
270 enum vect_def_type dt = vect_uninitialized_def;
271
894dd753 272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
b28ead45
AH
273 {
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
278 }
279
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
282 }
283 return true;
284}
285
ebfd146a
IR
286/* Function vect_stmt_relevant_p.
287
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
290
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
295
296 CHECKME: what other side effects would the vectorizer allow? */
297
298static bool
355fe088 299vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
300 enum vect_relevant *relevant, bool *live_p)
301{
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
307
8644a673 308 *relevant = vect_unused_in_scope;
ebfd146a
IR
309 *live_p = false;
310
311 /* cond stmt other than loop exit cond. */
b8698a0f
L
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
8644a673 315 *relevant = vect_used_in_scope;
ebfd146a
IR
316
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
319 if (gimple_vdef (stmt)
320 && !gimple_clobber_p (stmt))
ebfd146a 321 {
73fbfcad 322 if (dump_enabled_p ())
78c60e3d 323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 324 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 325 *relevant = vect_used_in_scope;
ebfd146a
IR
326 }
327
328 /* uses outside the loop. */
329 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 {
331 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 {
333 basic_block bb = gimple_bb (USE_STMT (use_p));
334 if (!flow_bb_inside_loop_p (loop, bb))
335 {
73fbfcad 336 if (dump_enabled_p ())
78c60e3d 337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 338 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 339
3157b0c2
AO
340 if (is_gimple_debug (USE_STMT (use_p)))
341 continue;
342
ebfd146a
IR
343 /* We expect all such uses to be in the loop exit phis
344 (because of loop closed form) */
345 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
346 gcc_assert (bb == single_exit (loop)->dest);
347
348 *live_p = true;
349 }
350 }
351 }
352
3a2edf4c
AH
353 if (*live_p && *relevant == vect_unused_in_scope
354 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
355 {
356 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE, vect_location,
358 "vec_stmt_relevant_p: stmt live but not relevant.\n");
359 *relevant = vect_used_only_live;
360 }
361
ebfd146a
IR
362 return (*live_p || *relevant);
363}
364
365
b8698a0f 366/* Function exist_non_indexing_operands_for_use_p
ebfd146a 367
ff802fa1 368 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
369 used in STMT for anything other than indexing an array. */
370
371static bool
355fe088 372exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
373{
374 tree operand;
375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 376
ff802fa1 377 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
378 reference in STMT, then any operand that corresponds to USE
379 is not indexing an array. */
380 if (!STMT_VINFO_DATA_REF (stmt_info))
381 return true;
59a05b0c 382
ebfd146a
IR
383 /* STMT has a data_ref. FORNOW this means that its of one of
384 the following forms:
385 -1- ARRAY_REF = var
386 -2- var = ARRAY_REF
387 (This should have been verified in analyze_data_refs).
388
389 'var' in the second case corresponds to a def, not a use,
b8698a0f 390 so USE cannot correspond to any operands that are not used
ebfd146a
IR
391 for array indexing.
392
393 Therefore, all we need to check is if STMT falls into the
394 first case, and whether var corresponds to USE. */
ebfd146a
IR
395
396 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
397 {
398 if (is_gimple_call (stmt)
399 && gimple_call_internal_p (stmt))
bfaa08b7
RS
400 {
401 internal_fn ifn = gimple_call_internal_fn (stmt);
402 int mask_index = internal_fn_mask_index (ifn);
403 if (mask_index >= 0
404 && use == gimple_call_arg (stmt, mask_index))
405 return true;
f307441a
RS
406 int stored_value_index = internal_fn_stored_value_index (ifn);
407 if (stored_value_index >= 0
408 && use == gimple_call_arg (stmt, stored_value_index))
409 return true;
bfaa08b7
RS
410 if (internal_gather_scatter_fn_p (ifn)
411 && use == gimple_call_arg (stmt, 1))
412 return true;
bfaa08b7 413 }
5ce9450f
JJ
414 return false;
415 }
416
59a05b0c
EB
417 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
418 return false;
ebfd146a 419 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
420 if (TREE_CODE (operand) != SSA_NAME)
421 return false;
422
423 if (operand == use)
424 return true;
425
426 return false;
427}
428
429
b8698a0f 430/*
ebfd146a
IR
431 Function process_use.
432
433 Inputs:
434 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 435 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 436 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 437 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
438 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
439 be performed.
ebfd146a
IR
440
441 Outputs:
442 Generally, LIVE_P and RELEVANT are used to define the liveness and
443 relevance info of the DEF_STMT of this USE:
444 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
445 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
446 Exceptions:
447 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 448 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 449 of the respective DEF_STMT is left unchanged.
b8698a0f
L
450 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
451 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
452 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
453 be modified accordingly.
454
455 Return true if everything is as expected. Return false otherwise. */
456
457static bool
b28ead45 458process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 459 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 460 bool force)
ebfd146a
IR
461{
462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
464 stmt_vec_info dstmt_vinfo;
465 basic_block bb, def_bb;
355fe088 466 gimple *def_stmt;
ebfd146a
IR
467 enum vect_def_type dt;
468
b8698a0f 469 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 470 that are used for address computation are not considered relevant. */
aec7ae7d 471 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
472 return true;
473
894dd753 474 if (!vect_is_simple_use (use, loop_vinfo, &dt, &def_stmt))
b8698a0f 475 {
73fbfcad 476 if (dump_enabled_p ())
78c60e3d 477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 478 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
479 return false;
480 }
481
482 if (!def_stmt || gimple_nop_p (def_stmt))
483 return true;
484
485 def_bb = gimple_bb (def_stmt);
486 if (!flow_bb_inside_loop_p (loop, def_bb))
487 {
73fbfcad 488 if (dump_enabled_p ())
e645e942 489 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
490 return true;
491 }
492
b8698a0f
L
493 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
494 DEF_STMT must have already been processed, because this should be the
495 only way that STMT, which is a reduction-phi, was put in the worklist,
496 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
497 check that everything is as expected, and we are done. */
498 dstmt_vinfo = vinfo_for_stmt (def_stmt);
499 bb = gimple_bb (stmt);
500 if (gimple_code (stmt) == GIMPLE_PHI
501 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
502 && gimple_code (def_stmt) != GIMPLE_PHI
503 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
504 && bb->loop_father == def_bb->loop_father)
505 {
73fbfcad 506 if (dump_enabled_p ())
78c60e3d 507 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 508 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a 509 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 510 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 511 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
512 return true;
513 }
514
515 /* case 3a: outer-loop stmt defining an inner-loop stmt:
516 outer-loop-header-bb:
517 d = def_stmt
518 inner-loop:
519 stmt # use (d)
520 outer-loop-tail-bb:
521 ... */
522 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
523 {
73fbfcad 524 if (dump_enabled_p ())
78c60e3d 525 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 526 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 527
ebfd146a
IR
528 switch (relevant)
529 {
8644a673 530 case vect_unused_in_scope:
7c5222ff
IR
531 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
532 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 533 break;
7c5222ff 534
ebfd146a 535 case vect_used_in_outer_by_reduction:
7c5222ff 536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
537 relevant = vect_used_by_reduction;
538 break;
7c5222ff 539
ebfd146a 540 case vect_used_in_outer:
7c5222ff 541 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 542 relevant = vect_used_in_scope;
ebfd146a 543 break;
7c5222ff 544
8644a673 545 case vect_used_in_scope:
ebfd146a
IR
546 break;
547
548 default:
549 gcc_unreachable ();
b8698a0f 550 }
ebfd146a
IR
551 }
552
553 /* case 3b: inner-loop stmt defining an outer-loop stmt:
554 outer-loop-header-bb:
555 ...
556 inner-loop:
557 d = def_stmt
06066f92 558 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
559 stmt # use (d) */
560 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
561 {
73fbfcad 562 if (dump_enabled_p ())
78c60e3d 563 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 564 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 565
ebfd146a
IR
566 switch (relevant)
567 {
8644a673 568 case vect_unused_in_scope:
b8698a0f 569 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 570 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 571 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
572 break;
573
ebfd146a 574 case vect_used_by_reduction:
b28ead45 575 case vect_used_only_live:
ebfd146a
IR
576 relevant = vect_used_in_outer_by_reduction;
577 break;
578
8644a673 579 case vect_used_in_scope:
ebfd146a
IR
580 relevant = vect_used_in_outer;
581 break;
582
583 default:
584 gcc_unreachable ();
585 }
586 }
643a9684
RB
587 /* We are also not interested in uses on loop PHI backedges that are
588 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
589 and cause hybrid SLP for SLP inductions. Unless the PHI is live
590 of course. */
643a9684
RB
591 else if (gimple_code (stmt) == GIMPLE_PHI
592 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 593 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
594 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
595 == use))
596 {
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE, vect_location,
599 "induction value on backedge.\n");
600 return true;
601 }
602
ebfd146a 603
b28ead45 604 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
605 return true;
606}
607
608
609/* Function vect_mark_stmts_to_be_vectorized.
610
611 Not all stmts in the loop need to be vectorized. For example:
612
613 for i...
614 for j...
615 1. T0 = i + j
616 2. T1 = a[T0]
617
618 3. j = j + 1
619
620 Stmt 1 and 3 do not need to be vectorized, because loop control and
621 addressing of vectorized data-refs are handled differently.
622
623 This pass detects such stmts. */
624
625bool
626vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
627{
ebfd146a
IR
628 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
629 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
630 unsigned int nbbs = loop->num_nodes;
631 gimple_stmt_iterator si;
355fe088 632 gimple *stmt;
ebfd146a
IR
633 unsigned int i;
634 stmt_vec_info stmt_vinfo;
635 basic_block bb;
355fe088 636 gimple *phi;
ebfd146a 637 bool live_p;
b28ead45 638 enum vect_relevant relevant;
ebfd146a 639
adac3a68 640 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
ebfd146a 641
355fe088 642 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
643
644 /* 1. Init worklist. */
645 for (i = 0; i < nbbs; i++)
646 {
647 bb = bbs[i];
648 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 649 {
ebfd146a 650 phi = gsi_stmt (si);
73fbfcad 651 if (dump_enabled_p ())
ebfd146a 652 {
78c60e3d
SS
653 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
654 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
655 }
656
657 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 658 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
659 }
660 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
661 {
662 stmt = gsi_stmt (si);
73fbfcad 663 if (dump_enabled_p ())
ebfd146a 664 {
78c60e3d
SS
665 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 667 }
ebfd146a
IR
668
669 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 670 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
671 }
672 }
673
674 /* 2. Process_worklist */
9771b263 675 while (worklist.length () > 0)
ebfd146a
IR
676 {
677 use_operand_p use_p;
678 ssa_op_iter iter;
679
9771b263 680 stmt = worklist.pop ();
73fbfcad 681 if (dump_enabled_p ())
ebfd146a 682 {
78c60e3d
SS
683 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
685 }
686
b8698a0f 687 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
688 (DEF_STMT) as relevant/irrelevant according to the relevance property
689 of STMT. */
ebfd146a
IR
690 stmt_vinfo = vinfo_for_stmt (stmt);
691 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 692
b28ead45
AH
693 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
694 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
695
696 One exception is when STMT has been identified as defining a reduction
b28ead45 697 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 698 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 699 those that are used by a reduction computation, and those that are
ff802fa1 700 (also) used by a regular computation. This allows us later on to
b8698a0f 701 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 702 order of the results that they produce does not have to be kept. */
ebfd146a 703
b28ead45 704 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 705 {
06066f92 706 case vect_reduction_def:
b28ead45
AH
707 gcc_assert (relevant != vect_unused_in_scope);
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_in_scope
710 && relevant != vect_used_by_reduction
711 && relevant != vect_used_only_live)
06066f92 712 {
b28ead45
AH
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "unsupported use of reduction.\n");
716 return false;
06066f92 717 }
06066f92 718 break;
b8698a0f 719
06066f92 720 case vect_nested_cycle:
b28ead45
AH
721 if (relevant != vect_unused_in_scope
722 && relevant != vect_used_in_outer_by_reduction
723 && relevant != vect_used_in_outer)
06066f92 724 {
73fbfcad 725 if (dump_enabled_p ())
78c60e3d 726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 727 "unsupported use of nested cycle.\n");
7c5222ff 728
06066f92
IR
729 return false;
730 }
b8698a0f
L
731 break;
732
06066f92 733 case vect_double_reduction_def:
b28ead45
AH
734 if (relevant != vect_unused_in_scope
735 && relevant != vect_used_by_reduction
736 && relevant != vect_used_only_live)
06066f92 737 {
73fbfcad 738 if (dump_enabled_p ())
78c60e3d 739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 740 "unsupported use of double reduction.\n");
7c5222ff 741
7c5222ff 742 return false;
06066f92 743 }
b8698a0f 744 break;
7c5222ff 745
06066f92
IR
746 default:
747 break;
7c5222ff 748 }
b8698a0f 749
aec7ae7d 750 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
751 {
752 /* Pattern statements are not inserted into the code, so
753 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
754 have to scan the RHS or function arguments instead. */
755 if (is_gimple_assign (stmt))
756 {
69d2aade
JJ
757 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
758 tree op = gimple_assign_rhs1 (stmt);
759
760 i = 1;
761 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
762 {
763 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 764 relevant, &worklist, false)
69d2aade 765 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 766 relevant, &worklist, false))
566d377a 767 return false;
69d2aade
JJ
768 i = 2;
769 }
770 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 771 {
69d2aade 772 op = gimple_op (stmt, i);
afbe6325 773 if (TREE_CODE (op) == SSA_NAME
b28ead45 774 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 775 &worklist, false))
07687835 776 return false;
9d5e7640
IR
777 }
778 }
779 else if (is_gimple_call (stmt))
780 {
781 for (i = 0; i < gimple_call_num_args (stmt); i++)
782 {
783 tree arg = gimple_call_arg (stmt, i);
b28ead45 784 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 785 &worklist, false))
07687835 786 return false;
9d5e7640
IR
787 }
788 }
789 }
790 else
791 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
792 {
793 tree op = USE_FROM_PTR (use_p);
b28ead45 794 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 795 &worklist, false))
07687835 796 return false;
9d5e7640 797 }
aec7ae7d 798
3bab6342 799 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 800 {
134c85ca
RS
801 gather_scatter_info gs_info;
802 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
803 gcc_unreachable ();
804 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
805 &worklist, true))
566d377a 806 return false;
aec7ae7d 807 }
ebfd146a
IR
808 } /* while worklist */
809
ebfd146a
IR
810 return true;
811}
812
68435eb2
RB
813/* Compute the prologue cost for invariant or constant operands. */
814
815static unsigned
816vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
817 unsigned opno, enum vect_def_type dt,
818 stmt_vector_for_cost *cost_vec)
819{
820 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
821 tree op = gimple_op (stmt, opno);
822 unsigned prologue_cost = 0;
823
824 /* Without looking at the actual initializer a vector of
825 constants can be implemented as load from the constant pool.
826 When all elements are the same we can use a splat. */
827 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
828 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
829 unsigned num_vects_to_check;
830 unsigned HOST_WIDE_INT const_nunits;
831 unsigned nelt_limit;
832 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
833 && ! multiple_p (const_nunits, group_size))
834 {
835 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
836 nelt_limit = const_nunits;
837 }
838 else
839 {
840 /* If either the vector has variable length or the vectors
841 are composed of repeated whole groups we only need to
842 cost construction once. All vectors will be the same. */
843 num_vects_to_check = 1;
844 nelt_limit = group_size;
845 }
846 tree elt = NULL_TREE;
847 unsigned nelt = 0;
848 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
849 {
850 unsigned si = j % group_size;
851 if (nelt == 0)
852 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
853 /* ??? We're just tracking whether all operands of a single
854 vector initializer are the same, ideally we'd check if
855 we emitted the same one already. */
856 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
857 opno))
858 elt = NULL_TREE;
859 nelt++;
860 if (nelt == nelt_limit)
861 {
862 /* ??? We need to pass down stmt_info for a vector type
863 even if it points to the wrong stmt. */
864 prologue_cost += record_stmt_cost
865 (cost_vec, 1,
866 dt == vect_external_def
867 ? (elt ? scalar_to_vec : vec_construct)
868 : vector_load,
869 stmt_info, 0, vect_prologue);
870 nelt = 0;
871 }
872 }
873
874 return prologue_cost;
875}
ebfd146a 876
b8698a0f 877/* Function vect_model_simple_cost.
ebfd146a 878
b8698a0f 879 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
880 single op. Right now, this does not account for multiple insns that could
881 be generated for the single vector op. We will handle that shortly. */
882
68435eb2 883static void
b8698a0f 884vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 885 enum vect_def_type *dt,
4fc5ebf1 886 int ndts,
68435eb2
RB
887 slp_tree node,
888 stmt_vector_for_cost *cost_vec)
ebfd146a 889{
92345349 890 int inside_cost = 0, prologue_cost = 0;
ebfd146a 891
68435eb2 892 gcc_assert (cost_vec != NULL);
ebfd146a 893
68435eb2
RB
894 /* ??? Somehow we need to fix this at the callers. */
895 if (node)
896 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
897
898 if (node)
899 {
900 /* Scan operands and account for prologue cost of constants/externals.
901 ??? This over-estimates cost for multiple uses and should be
902 re-engineered. */
903 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
904 tree lhs = gimple_get_lhs (stmt);
905 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
906 {
907 tree op = gimple_op (stmt, i);
68435eb2
RB
908 enum vect_def_type dt;
909 if (!op || op == lhs)
910 continue;
894dd753 911 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
68435eb2
RB
912 && (dt == vect_constant_def || dt == vect_external_def))
913 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
914 i, dt, cost_vec);
915 }
916 }
917 else
918 /* Cost the "broadcast" of a scalar operand in to a vector operand.
919 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
920 cost model. */
921 for (int i = 0; i < ndts; i++)
922 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
923 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
924 stmt_info, 0, vect_prologue);
925
926 /* Adjust for two-operator SLP nodes. */
927 if (node && SLP_TREE_TWO_OPERATORS (node))
928 {
929 ncopies *= 2;
930 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
931 stmt_info, 0, vect_body);
932 }
c3e7ee41
BS
933
934 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
935 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
936 stmt_info, 0, vect_body);
c3e7ee41 937
73fbfcad 938 if (dump_enabled_p ())
78c60e3d
SS
939 dump_printf_loc (MSG_NOTE, vect_location,
940 "vect_model_simple_cost: inside_cost = %d, "
e645e942 941 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
942}
943
944
8bd37302
BS
945/* Model cost for type demotion and promotion operations. PWR is normally
946 zero for single-step promotions and demotions. It will be one if
947 two-step promotion/demotion is required, and so on. Each additional
948 step doubles the number of instructions required. */
949
950static void
951vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
952 enum vect_def_type *dt, int pwr,
953 stmt_vector_for_cost *cost_vec)
8bd37302
BS
954{
955 int i, tmp;
92345349 956 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 957
8bd37302
BS
958 for (i = 0; i < pwr + 1; i++)
959 {
960 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
961 (i + 1) : i;
68435eb2
RB
962 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
963 vec_promote_demote, stmt_info, 0,
964 vect_body);
8bd37302
BS
965 }
966
967 /* FORNOW: Assuming maximum 2 args per stmts. */
968 for (i = 0; i < 2; i++)
92345349 969 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
970 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
971 stmt_info, 0, vect_prologue);
8bd37302 972
73fbfcad 973 if (dump_enabled_p ())
78c60e3d
SS
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
977}
978
ebfd146a
IR
979/* Function vect_model_store_cost
980
0d0293ac
MM
981 Models cost for stores. In the case of grouped accesses, one access
982 has the overhead of the grouped access attributed to it. */
ebfd146a 983
68435eb2 984static void
b8698a0f 985vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 986 enum vect_def_type dt,
2de001ee 987 vect_memory_access_type memory_access_type,
9ce4345a 988 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 989 stmt_vector_for_cost *cost_vec)
ebfd146a 990{
92345349 991 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
992 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
993 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 994
68435eb2
RB
995 /* ??? Somehow we need to fix this at the callers. */
996 if (slp_node)
997 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
998
9ce4345a 999 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
1000 {
1001 if (slp_node)
1002 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1003 1, dt, cost_vec);
1004 else
1005 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1006 stmt_info, 0, vect_prologue);
1007 }
ebfd146a 1008
892a981f
RS
1009 /* Grouped stores update all elements in the group at once,
1010 so we want the DR for the first statement. */
1011 if (!slp_node && grouped_access_p)
57c454d2 1012 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1013
892a981f
RS
1014 /* True if we should include any once-per-group costs as well as
1015 the cost of the statement itself. For SLP we only get called
1016 once per group anyhow. */
1017 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1018
272c6793 1019 /* We assume that the cost of a single store-lanes instruction is
2c53b149 1020 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
272c6793 1021 access is instead being provided by a permute-and-store operation,
2de001ee
RS
1022 include the cost of the permutes. */
1023 if (first_stmt_p
1024 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1025 {
e1377713
ES
1026 /* Uses a high and low interleave or shuffle operations for each
1027 needed permute. */
2c53b149 1028 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 1029 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1030 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1031 stmt_info, 0, vect_body);
ebfd146a 1032
73fbfcad 1033 if (dump_enabled_p ())
78c60e3d 1034 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1035 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1036 group_size);
ebfd146a
IR
1037 }
1038
cee62fee 1039 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1040 /* Costs of the stores. */
067bc855
RB
1041 if (memory_access_type == VMAT_ELEMENTWISE
1042 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1043 {
1044 /* N scalar stores plus extracting the elements. */
1045 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1046 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1047 ncopies * assumed_nunits,
1048 scalar_store, stmt_info, 0, vect_body);
1049 }
f2e2a985 1050 else
57c454d2 1051 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
ebfd146a 1052
2de001ee
RS
1053 if (memory_access_type == VMAT_ELEMENTWISE
1054 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1055 {
1056 /* N scalar stores plus extracting the elements. */
1057 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1058 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1059 ncopies * assumed_nunits,
1060 vec_to_scalar, stmt_info, 0, vect_body);
1061 }
cee62fee 1062
73fbfcad 1063 if (dump_enabled_p ())
78c60e3d
SS
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "vect_model_store_cost: inside_cost = %d, "
e645e942 1066 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1067}
1068
1069
720f5239
IR
1070/* Calculate cost of DR's memory access. */
1071void
57c454d2 1072vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1073 unsigned int *inside_cost,
92345349 1074 stmt_vector_for_cost *body_cost_vec)
720f5239 1075{
57c454d2 1076 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1077 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1078
1079 switch (alignment_support_scheme)
1080 {
1081 case dr_aligned:
1082 {
92345349
BS
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 vector_store, stmt_info, 0,
1085 vect_body);
720f5239 1086
73fbfcad 1087 if (dump_enabled_p ())
78c60e3d 1088 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1089 "vect_model_store_cost: aligned.\n");
720f5239
IR
1090 break;
1091 }
1092
1093 case dr_unaligned_supported:
1094 {
720f5239 1095 /* Here, we assign an additional cost for the unaligned store. */
92345349 1096 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1097 unaligned_store, stmt_info,
92345349 1098 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1099 if (dump_enabled_p ())
78c60e3d
SS
1100 dump_printf_loc (MSG_NOTE, vect_location,
1101 "vect_model_store_cost: unaligned supported by "
e645e942 1102 "hardware.\n");
720f5239
IR
1103 break;
1104 }
1105
38eec4c6
UW
1106 case dr_unaligned_unsupported:
1107 {
1108 *inside_cost = VECT_MAX_COST;
1109
73fbfcad 1110 if (dump_enabled_p ())
78c60e3d 1111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1112 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1113 break;
1114 }
1115
720f5239
IR
1116 default:
1117 gcc_unreachable ();
1118 }
1119}
1120
1121
ebfd146a
IR
1122/* Function vect_model_load_cost
1123
892a981f
RS
1124 Models cost for loads. In the case of grouped accesses, one access has
1125 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1126 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1127 access scheme chosen. */
1128
68435eb2
RB
1129static void
1130vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1131 vect_memory_access_type memory_access_type,
68435eb2 1132 slp_instance instance,
2de001ee 1133 slp_tree slp_node,
68435eb2 1134 stmt_vector_for_cost *cost_vec)
ebfd146a 1135{
892a981f 1136 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
92345349 1137 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1138 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1139
68435eb2
RB
1140 gcc_assert (cost_vec);
1141
1142 /* ??? Somehow we need to fix this at the callers. */
1143 if (slp_node)
1144 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1145
1146 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1147 {
1148 /* If the load is permuted then the alignment is determined by
1149 the first group element not by the first scalar stmt DR. */
2c53b149 1150 gimple *stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
68435eb2
RB
1151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1152 /* Record the cost for the permutation. */
1153 unsigned n_perms;
1154 unsigned assumed_nunits
1155 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1156 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1157 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1158 slp_vf, instance, true,
1159 &n_perms);
1160 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1161 stmt_info, 0, vect_body);
1162 /* And adjust the number of loads performed. This handles
1163 redundancies as well as loads that are later dead. */
2c53b149 1164 auto_sbitmap perm (DR_GROUP_SIZE (stmt_info));
68435eb2
RB
1165 bitmap_clear (perm);
1166 for (unsigned i = 0;
1167 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1168 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1169 ncopies = 0;
1170 bool load_seen = false;
2c53b149 1171 for (unsigned i = 0; i < DR_GROUP_SIZE (stmt_info); ++i)
68435eb2
RB
1172 {
1173 if (i % assumed_nunits == 0)
1174 {
1175 if (load_seen)
1176 ncopies++;
1177 load_seen = false;
1178 }
1179 if (bitmap_bit_p (perm, i))
1180 load_seen = true;
1181 }
1182 if (load_seen)
1183 ncopies++;
1184 gcc_assert (ncopies
2c53b149 1185 <= (DR_GROUP_SIZE (stmt_info) - DR_GROUP_GAP (stmt_info)
68435eb2
RB
1186 + assumed_nunits - 1) / assumed_nunits);
1187 }
1188
892a981f
RS
1189 /* Grouped loads read all elements in the group at once,
1190 so we want the DR for the first statement. */
1191 if (!slp_node && grouped_access_p)
57c454d2 1192 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1193
892a981f
RS
1194 /* True if we should include any once-per-group costs as well as
1195 the cost of the statement itself. For SLP we only get called
1196 once per group anyhow. */
1197 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1198
272c6793 1199 /* We assume that the cost of a single load-lanes instruction is
2c53b149 1200 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
272c6793 1201 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1202 include the cost of the permutes. */
1203 if (first_stmt_p
1204 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1205 {
2c23db6d
ES
1206 /* Uses an even and odd extract operations or shuffle operations
1207 for each needed permute. */
2c53b149 1208 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d 1209 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1210 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1211 stmt_info, 0, vect_body);
ebfd146a 1212
73fbfcad 1213 if (dump_enabled_p ())
e645e942
TJ
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1216 group_size);
ebfd146a
IR
1217 }
1218
1219 /* The loads themselves. */
067bc855
RB
1220 if (memory_access_type == VMAT_ELEMENTWISE
1221 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1222 {
a21892ad
BS
1223 /* N scalar loads plus gathering them into a vector. */
1224 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1225 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1226 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1227 ncopies * assumed_nunits,
92345349 1228 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1229 }
1230 else
57c454d2 1231 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
92345349 1232 &inside_cost, &prologue_cost,
68435eb2 1233 cost_vec, cost_vec, true);
2de001ee
RS
1234 if (memory_access_type == VMAT_ELEMENTWISE
1235 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1236 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1237 stmt_info, 0, vect_body);
720f5239 1238
73fbfcad 1239 if (dump_enabled_p ())
78c60e3d
SS
1240 dump_printf_loc (MSG_NOTE, vect_location,
1241 "vect_model_load_cost: inside_cost = %d, "
e645e942 1242 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1243}
1244
1245
1246/* Calculate cost of DR's memory access. */
1247void
57c454d2 1248vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1249 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1250 unsigned int *prologue_cost,
1251 stmt_vector_for_cost *prologue_cost_vec,
1252 stmt_vector_for_cost *body_cost_vec,
1253 bool record_prologue_costs)
720f5239 1254{
57c454d2 1255 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1256 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1257
1258 switch (alignment_support_scheme)
ebfd146a
IR
1259 {
1260 case dr_aligned:
1261 {
92345349
BS
1262 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1263 stmt_info, 0, vect_body);
ebfd146a 1264
73fbfcad 1265 if (dump_enabled_p ())
78c60e3d 1266 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1267 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1268
1269 break;
1270 }
1271 case dr_unaligned_supported:
1272 {
720f5239 1273 /* Here, we assign an additional cost for the unaligned load. */
92345349 1274 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1275 unaligned_load, stmt_info,
92345349 1276 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1277
73fbfcad 1278 if (dump_enabled_p ())
78c60e3d
SS
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "vect_model_load_cost: unaligned supported by "
e645e942 1281 "hardware.\n");
ebfd146a
IR
1282
1283 break;
1284 }
1285 case dr_explicit_realign:
1286 {
92345349
BS
1287 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1288 vector_load, stmt_info, 0, vect_body);
1289 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1290 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1291
1292 /* FIXME: If the misalignment remains fixed across the iterations of
1293 the containing loop, the following cost should be added to the
92345349 1294 prologue costs. */
ebfd146a 1295 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1296 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1297 stmt_info, 0, vect_body);
ebfd146a 1298
73fbfcad 1299 if (dump_enabled_p ())
e645e942
TJ
1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "vect_model_load_cost: explicit realign\n");
8bd37302 1302
ebfd146a
IR
1303 break;
1304 }
1305 case dr_explicit_realign_optimized:
1306 {
73fbfcad 1307 if (dump_enabled_p ())
e645e942 1308 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1309 "vect_model_load_cost: unaligned software "
e645e942 1310 "pipelined.\n");
ebfd146a
IR
1311
1312 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1313 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1314 if this is an access in a group of loads, which provide grouped
ebfd146a 1315 access, then the above cost should only be considered for one
ff802fa1 1316 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1317 and a realignment op. */
1318
92345349 1319 if (add_realign_cost && record_prologue_costs)
ebfd146a 1320 {
92345349
BS
1321 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1322 vector_stmt, stmt_info,
1323 0, vect_prologue);
ebfd146a 1324 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1325 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1326 vector_stmt, stmt_info,
1327 0, vect_prologue);
ebfd146a
IR
1328 }
1329
92345349
BS
1330 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1331 stmt_info, 0, vect_body);
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1333 stmt_info, 0, vect_body);
8bd37302 1334
73fbfcad 1335 if (dump_enabled_p ())
78c60e3d 1336 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1337 "vect_model_load_cost: explicit realign optimized"
1338 "\n");
8bd37302 1339
ebfd146a
IR
1340 break;
1341 }
1342
38eec4c6
UW
1343 case dr_unaligned_unsupported:
1344 {
1345 *inside_cost = VECT_MAX_COST;
1346
73fbfcad 1347 if (dump_enabled_p ())
78c60e3d 1348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1349 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1350 break;
1351 }
1352
ebfd146a
IR
1353 default:
1354 gcc_unreachable ();
1355 }
ebfd146a
IR
1356}
1357
418b7df3
RG
1358/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1359 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1360
418b7df3 1361static void
355fe088 1362vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1363{
ebfd146a 1364 if (gsi)
418b7df3 1365 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1366 else
1367 {
418b7df3 1368 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1369 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1370
a70d6342
IR
1371 if (loop_vinfo)
1372 {
1373 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1374 basic_block new_bb;
1375 edge pe;
a70d6342
IR
1376
1377 if (nested_in_vect_loop_p (loop, stmt))
1378 loop = loop->inner;
b8698a0f 1379
a70d6342 1380 pe = loop_preheader_edge (loop);
418b7df3 1381 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1382 gcc_assert (!new_bb);
1383 }
1384 else
1385 {
1386 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1387 basic_block bb;
1388 gimple_stmt_iterator gsi_bb_start;
1389
1390 gcc_assert (bb_vinfo);
1391 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1392 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1393 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1394 }
ebfd146a
IR
1395 }
1396
73fbfcad 1397 if (dump_enabled_p ())
ebfd146a 1398 {
78c60e3d
SS
1399 dump_printf_loc (MSG_NOTE, vect_location,
1400 "created new init_stmt: ");
1401 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1402 }
418b7df3
RG
1403}
1404
1405/* Function vect_init_vector.
ebfd146a 1406
5467ee52
RG
1407 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1408 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1409 vector type a vector with all elements equal to VAL is created first.
1410 Place the initialization at BSI if it is not NULL. Otherwise, place the
1411 initialization at the loop preheader.
418b7df3
RG
1412 Return the DEF of INIT_STMT.
1413 It will be used in the vectorization of STMT. */
1414
1415tree
355fe088 1416vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1417{
355fe088 1418 gimple *init_stmt;
418b7df3
RG
1419 tree new_temp;
1420
e412ece4
RB
1421 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1422 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1423 {
e412ece4
RB
1424 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1425 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1426 {
5a308cf1
IE
1427 /* Scalar boolean value should be transformed into
1428 all zeros or all ones value before building a vector. */
1429 if (VECTOR_BOOLEAN_TYPE_P (type))
1430 {
b3d51f23
IE
1431 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1432 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1433
1434 if (CONSTANT_CLASS_P (val))
1435 val = integer_zerop (val) ? false_val : true_val;
1436 else
1437 {
1438 new_temp = make_ssa_name (TREE_TYPE (type));
1439 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1440 val, true_val, false_val);
1441 vect_init_vector_1 (stmt, init_stmt, gsi);
1442 val = new_temp;
1443 }
1444 }
1445 else if (CONSTANT_CLASS_P (val))
42fd8198 1446 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1447 else
1448 {
b731b390 1449 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1450 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1451 init_stmt = gimple_build_assign (new_temp,
1452 fold_build1 (VIEW_CONVERT_EXPR,
1453 TREE_TYPE (type),
1454 val));
1455 else
1456 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1457 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1458 val = new_temp;
418b7df3
RG
1459 }
1460 }
5467ee52 1461 val = build_vector_from_val (type, val);
418b7df3
RG
1462 }
1463
0e22bb5a
RB
1464 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1465 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1466 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1467 return new_temp;
ebfd146a
IR
1468}
1469
c83a894c 1470/* Function vect_get_vec_def_for_operand_1.
a70d6342 1471
c83a894c
AH
1472 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1473 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1474
1475tree
c83a894c 1476vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1477{
1478 tree vec_oprnd;
355fe088 1479 gimple *vec_stmt;
ebfd146a 1480 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1481
1482 switch (dt)
1483 {
81c40241 1484 /* operand is a constant or a loop invariant. */
ebfd146a 1485 case vect_constant_def:
81c40241 1486 case vect_external_def:
c83a894c
AH
1487 /* Code should use vect_get_vec_def_for_operand. */
1488 gcc_unreachable ();
ebfd146a 1489
81c40241 1490 /* operand is defined inside the loop. */
8644a673 1491 case vect_internal_def:
ebfd146a 1492 {
ebfd146a
IR
1493 /* Get the def from the vectorized stmt. */
1494 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1495
ebfd146a 1496 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1497 /* Get vectorized pattern statement. */
1498 if (!vec_stmt
1499 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1500 && !STMT_VINFO_RELEVANT (def_stmt_info))
1501 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1502 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1503 gcc_assert (vec_stmt);
1504 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1505 vec_oprnd = PHI_RESULT (vec_stmt);
1506 else if (is_gimple_call (vec_stmt))
1507 vec_oprnd = gimple_call_lhs (vec_stmt);
1508 else
1509 vec_oprnd = gimple_assign_lhs (vec_stmt);
1510 return vec_oprnd;
1511 }
1512
c78e3652 1513 /* operand is defined by a loop header phi. */
ebfd146a 1514 case vect_reduction_def:
06066f92 1515 case vect_double_reduction_def:
7c5222ff 1516 case vect_nested_cycle:
ebfd146a
IR
1517 case vect_induction_def:
1518 {
1519 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1520
1521 /* Get the def from the vectorized stmt. */
1522 def_stmt_info = vinfo_for_stmt (def_stmt);
1523 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1524 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1525 vec_oprnd = PHI_RESULT (vec_stmt);
1526 else
1527 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1528 return vec_oprnd;
1529 }
1530
1531 default:
1532 gcc_unreachable ();
1533 }
1534}
1535
1536
c83a894c
AH
1537/* Function vect_get_vec_def_for_operand.
1538
1539 OP is an operand in STMT. This function returns a (vector) def that will be
1540 used in the vectorized stmt for STMT.
1541
1542 In the case that OP is an SSA_NAME which is defined in the loop, then
1543 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1544
1545 In case OP is an invariant or constant, a new stmt that creates a vector def
1546 needs to be introduced. VECTYPE may be used to specify a required type for
1547 vector invariant. */
1548
1549tree
1550vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1551{
1552 gimple *def_stmt;
1553 enum vect_def_type dt;
1554 bool is_simple_use;
1555 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1556 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1557
1558 if (dump_enabled_p ())
1559 {
1560 dump_printf_loc (MSG_NOTE, vect_location,
1561 "vect_get_vec_def_for_operand: ");
1562 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1563 dump_printf (MSG_NOTE, "\n");
1564 }
1565
894dd753 1566 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt, &def_stmt);
c83a894c
AH
1567 gcc_assert (is_simple_use);
1568 if (def_stmt && dump_enabled_p ())
1569 {
1570 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1572 }
1573
1574 if (dt == vect_constant_def || dt == vect_external_def)
1575 {
1576 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1577 tree vector_type;
1578
1579 if (vectype)
1580 vector_type = vectype;
2568d8a1 1581 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1582 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1583 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1584 else
1585 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1586
1587 gcc_assert (vector_type);
1588 return vect_init_vector (stmt, op, vector_type, NULL);
1589 }
1590 else
1591 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1592}
1593
1594
ebfd146a
IR
1595/* Function vect_get_vec_def_for_stmt_copy
1596
ff802fa1 1597 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1598 vectorized stmt to be created (by the caller to this function) is a "copy"
1599 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1600 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1601 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1602 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1603 DT is the type of the vector def VEC_OPRND.
1604
1605 Context:
1606 In case the vectorization factor (VF) is bigger than the number
1607 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1608 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1609 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1610 smallest data-type determines the VF, and as a result, when vectorizing
1611 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1612 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1613 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1614 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1615 which VF=16 and nunits=4, so the number of copies required is 4):
1616
1617 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1618
ebfd146a
IR
1619 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1620 VS1.1: vx.1 = memref1 VS1.2
1621 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1622 VS1.3: vx.3 = memref3
ebfd146a
IR
1623
1624 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1625 VSnew.1: vz1 = vx.1 + ... VSnew.2
1626 VSnew.2: vz2 = vx.2 + ... VSnew.3
1627 VSnew.3: vz3 = vx.3 + ...
1628
1629 The vectorization of S1 is explained in vectorizable_load.
1630 The vectorization of S2:
b8698a0f
L
1631 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1632 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1633 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1634 returns the vector-def 'vx.0'.
1635
b8698a0f
L
1636 To create the remaining copies of the vector-stmt (VSnew.j), this
1637 function is called to get the relevant vector-def for each operand. It is
1638 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1639 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1640
b8698a0f
L
1641 For example, to obtain the vector-def 'vx.1' in order to create the
1642 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1643 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1644 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1645 and return its def ('vx.1').
1646 Overall, to create the above sequence this function will be called 3 times:
1647 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1648 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1649 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1650
1651tree
1652vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1653{
355fe088 1654 gimple *vec_stmt_for_operand;
ebfd146a
IR
1655 stmt_vec_info def_stmt_info;
1656
1657 /* Do nothing; can reuse same def. */
8644a673 1658 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1659 return vec_oprnd;
1660
1661 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1662 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1663 gcc_assert (def_stmt_info);
1664 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1665 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1666 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1667 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1668 else
1669 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1670 return vec_oprnd;
1671}
1672
1673
1674/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1675 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1676
c78e3652 1677void
b8698a0f 1678vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1679 vec<tree> *vec_oprnds0,
1680 vec<tree> *vec_oprnds1)
ebfd146a 1681{
9771b263 1682 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1683
1684 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1685 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1686
9771b263 1687 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1688 {
9771b263 1689 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1690 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1691 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1692 }
1693}
1694
1695
c78e3652 1696/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1697
c78e3652 1698void
355fe088 1699vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1700 vec<tree> *vec_oprnds0,
1701 vec<tree> *vec_oprnds1,
306b0c92 1702 slp_tree slp_node)
ebfd146a
IR
1703{
1704 if (slp_node)
d092494c
IR
1705 {
1706 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1707 auto_vec<tree> ops (nops);
1708 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1709
9771b263 1710 ops.quick_push (op0);
d092494c 1711 if (op1)
9771b263 1712 ops.quick_push (op1);
d092494c 1713
306b0c92 1714 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1715
37b5ec8f 1716 *vec_oprnds0 = vec_defs[0];
d092494c 1717 if (op1)
37b5ec8f 1718 *vec_oprnds1 = vec_defs[1];
d092494c 1719 }
ebfd146a
IR
1720 else
1721 {
1722 tree vec_oprnd;
1723
9771b263 1724 vec_oprnds0->create (1);
81c40241 1725 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1726 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1727
1728 if (op1)
1729 {
9771b263 1730 vec_oprnds1->create (1);
81c40241 1731 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1732 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1733 }
1734 }
1735}
1736
bb6c2b68
RS
1737/* Helper function called by vect_finish_replace_stmt and
1738 vect_finish_stmt_generation. Set the location of the new
1739 statement and create a stmt_vec_info for it. */
1740
1741static void
1742vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1743{
1744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1745 vec_info *vinfo = stmt_info->vinfo;
1746
1747 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1748
1749 if (dump_enabled_p ())
1750 {
1751 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1752 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1753 }
1754
1755 gimple_set_location (vec_stmt, gimple_location (stmt));
1756
1757 /* While EH edges will generally prevent vectorization, stmt might
1758 e.g. be in a must-not-throw region. Ensure newly created stmts
1759 that could throw are part of the same region. */
1760 int lp_nr = lookup_stmt_eh_lp (stmt);
1761 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1762 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1763}
1764
1765/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1766 which sets the same scalar result as STMT did. */
1767
1768void
1769vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1770{
1771 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1772
1773 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1774 gsi_replace (&gsi, vec_stmt, false);
1775
1776 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1777}
ebfd146a
IR
1778
1779/* Function vect_finish_stmt_generation.
1780
1781 Insert a new stmt. */
1782
1783void
355fe088 1784vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1785 gimple_stmt_iterator *gsi)
1786{
ebfd146a
IR
1787 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1788
54e8e2c3
RG
1789 if (!gsi_end_p (*gsi)
1790 && gimple_has_mem_ops (vec_stmt))
1791 {
355fe088 1792 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1793 tree vuse = gimple_vuse (at_stmt);
1794 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1795 {
1796 tree vdef = gimple_vdef (at_stmt);
1797 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1798 /* If we have an SSA vuse and insert a store, update virtual
1799 SSA form to avoid triggering the renamer. Do so only
1800 if we can easily see all uses - which is what almost always
1801 happens with the way vectorized stmts are inserted. */
1802 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1803 && ((is_gimple_assign (vec_stmt)
1804 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1805 || (is_gimple_call (vec_stmt)
1806 && !(gimple_call_flags (vec_stmt)
1807 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1808 {
1809 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1810 gimple_set_vdef (vec_stmt, new_vdef);
1811 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1812 }
1813 }
1814 }
ebfd146a 1815 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
bb6c2b68 1816 vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1817}
1818
70439f0d
RS
1819/* We want to vectorize a call to combined function CFN with function
1820 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1821 as the types of all inputs. Check whether this is possible using
1822 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1823
70439f0d
RS
1824static internal_fn
1825vectorizable_internal_function (combined_fn cfn, tree fndecl,
1826 tree vectype_out, tree vectype_in)
ebfd146a 1827{
70439f0d
RS
1828 internal_fn ifn;
1829 if (internal_fn_p (cfn))
1830 ifn = as_internal_fn (cfn);
1831 else
1832 ifn = associated_internal_fn (fndecl);
1833 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1834 {
1835 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1836 if (info.vectorizable)
1837 {
1838 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1839 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1840 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1841 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1842 return ifn;
1843 }
1844 }
1845 return IFN_LAST;
ebfd146a
IR
1846}
1847
5ce9450f 1848
355fe088 1849static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1850 gimple_stmt_iterator *);
1851
7cfb4d93
RS
1852/* Check whether a load or store statement in the loop described by
1853 LOOP_VINFO is possible in a fully-masked loop. This is testing
1854 whether the vectorizer pass has the appropriate support, as well as
1855 whether the target does.
1856
1857 VLS_TYPE says whether the statement is a load or store and VECTYPE
1858 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1859 says how the load or store is going to be implemented and GROUP_SIZE
1860 is the number of load or store statements in the containing group.
bfaa08b7
RS
1861 If the access is a gather load or scatter store, GS_INFO describes
1862 its arguments.
7cfb4d93
RS
1863
1864 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1865 supported, otherwise record the required mask types. */
1866
1867static void
1868check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1869 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1870 vect_memory_access_type memory_access_type,
1871 gather_scatter_info *gs_info)
7cfb4d93
RS
1872{
1873 /* Invariant loads need no special support. */
1874 if (memory_access_type == VMAT_INVARIANT)
1875 return;
1876
1877 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1878 machine_mode vecmode = TYPE_MODE (vectype);
1879 bool is_load = (vls_type == VLS_LOAD);
1880 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1881 {
1882 if (is_load
1883 ? !vect_load_lanes_supported (vectype, group_size, true)
1884 : !vect_store_lanes_supported (vectype, group_size, true))
1885 {
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888 "can't use a fully-masked loop because the"
1889 " target doesn't have an appropriate masked"
1890 " load/store-lanes instruction.\n");
1891 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1892 return;
1893 }
1894 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1895 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1896 return;
1897 }
1898
bfaa08b7
RS
1899 if (memory_access_type == VMAT_GATHER_SCATTER)
1900 {
f307441a
RS
1901 internal_fn ifn = (is_load
1902 ? IFN_MASK_GATHER_LOAD
1903 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1904 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1905 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1906 gs_info->memory_type,
1907 TYPE_SIGN (offset_type),
1908 gs_info->scale))
1909 {
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1912 "can't use a fully-masked loop because the"
1913 " target doesn't have an appropriate masked"
f307441a 1914 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1915 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1916 return;
1917 }
1918 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1919 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1920 return;
1921 }
1922
7cfb4d93
RS
1923 if (memory_access_type != VMAT_CONTIGUOUS
1924 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1925 {
1926 /* Element X of the data must come from iteration i * VF + X of the
1927 scalar loop. We need more work to support other mappings. */
1928 if (dump_enabled_p ())
1929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1930 "can't use a fully-masked loop because an access"
1931 " isn't contiguous.\n");
1932 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1933 return;
1934 }
1935
1936 machine_mode mask_mode;
1937 if (!(targetm.vectorize.get_mask_mode
1938 (GET_MODE_NUNITS (vecmode),
1939 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1940 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1941 {
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944 "can't use a fully-masked loop because the target"
1945 " doesn't have the appropriate masked load or"
1946 " store.\n");
1947 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1948 return;
1949 }
1950 /* We might load more scalars than we need for permuting SLP loads.
1951 We checked in get_group_load_store_type that the extra elements
1952 don't leak into a new vector. */
1953 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1954 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1955 unsigned int nvectors;
1956 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1957 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1958 else
1959 gcc_unreachable ();
1960}
1961
1962/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1963 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1964 that needs to be applied to all loads and stores in a vectorized loop.
1965 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1966
1967 MASK_TYPE is the type of both masks. If new statements are needed,
1968 insert them before GSI. */
1969
1970static tree
1971prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1972 gimple_stmt_iterator *gsi)
1973{
1974 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1975 if (!loop_mask)
1976 return vec_mask;
1977
1978 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1979 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1980 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1981 vec_mask, loop_mask);
1982 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1983 return and_res;
1984}
1985
429ef523
RS
1986/* Determine whether we can use a gather load or scatter store to vectorize
1987 strided load or store STMT by truncating the current offset to a smaller
1988 width. We need to be able to construct an offset vector:
1989
1990 { 0, X, X*2, X*3, ... }
1991
1992 without loss of precision, where X is STMT's DR_STEP.
1993
1994 Return true if this is possible, describing the gather load or scatter
1995 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1996
1997static bool
1998vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1999 bool masked_p,
2000 gather_scatter_info *gs_info)
2001{
2002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2003 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2004 tree step = DR_STEP (dr);
2005 if (TREE_CODE (step) != INTEGER_CST)
2006 {
2007 /* ??? Perhaps we could use range information here? */
2008 if (dump_enabled_p ())
2009 dump_printf_loc (MSG_NOTE, vect_location,
2010 "cannot truncate variable step.\n");
2011 return false;
2012 }
2013
2014 /* Get the number of bits in an element. */
2015 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2016 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2017 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2018
2019 /* Set COUNT to the upper limit on the number of elements - 1.
2020 Start with the maximum vectorization factor. */
2021 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2022
2023 /* Try lowering COUNT to the number of scalar latch iterations. */
2024 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2025 widest_int max_iters;
2026 if (max_loop_iterations (loop, &max_iters)
2027 && max_iters < count)
2028 count = max_iters.to_shwi ();
2029
2030 /* Try scales of 1 and the element size. */
2031 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2032 bool overflow_p = false;
2033 for (int i = 0; i < 2; ++i)
2034 {
2035 int scale = scales[i];
2036 widest_int factor;
2037 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2038 continue;
2039
2040 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2041 in OFFSET_BITS bits. */
2042 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
2043 if (overflow_p)
2044 continue;
2045 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2046 if (wi::min_precision (range, sign) > element_bits)
2047 {
2048 overflow_p = true;
2049 continue;
2050 }
2051
2052 /* See whether the target supports the operation. */
2053 tree memory_type = TREE_TYPE (DR_REF (dr));
2054 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2055 memory_type, element_bits, sign, scale,
2056 &gs_info->ifn, &gs_info->element_type))
2057 continue;
2058
2059 tree offset_type = build_nonstandard_integer_type (element_bits,
2060 sign == UNSIGNED);
2061
2062 gs_info->decl = NULL_TREE;
2063 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2064 but we don't need to store that here. */
2065 gs_info->base = NULL_TREE;
2066 gs_info->offset = fold_convert (offset_type, step);
929b4411 2067 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2068 gs_info->offset_vectype = NULL_TREE;
2069 gs_info->scale = scale;
2070 gs_info->memory_type = memory_type;
2071 return true;
2072 }
2073
2074 if (overflow_p && dump_enabled_p ())
2075 dump_printf_loc (MSG_NOTE, vect_location,
2076 "truncating gather/scatter offset to %d bits"
2077 " might change its value.\n", element_bits);
2078
2079 return false;
2080}
2081
ab2fc782
RS
2082/* Return true if we can use gather/scatter internal functions to
2083 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
2084 MASKED_P is true if load or store is conditional. When returning
2085 true, fill in GS_INFO with the information required to perform the
2086 operation. */
ab2fc782
RS
2087
2088static bool
2089vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 2090 bool masked_p,
ab2fc782
RS
2091 gather_scatter_info *gs_info)
2092{
2093 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2094 || gs_info->decl)
429ef523
RS
2095 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2096 masked_p, gs_info);
ab2fc782
RS
2097
2098 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2099 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2100 tree offset_type = TREE_TYPE (gs_info->offset);
2101 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2102
2103 /* Enforced by vect_check_gather_scatter. */
2104 gcc_assert (element_bits >= offset_bits);
2105
2106 /* If the elements are wider than the offset, convert the offset to the
2107 same width, without changing its sign. */
2108 if (element_bits > offset_bits)
2109 {
2110 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2111 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2112 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2113 }
2114
2115 if (dump_enabled_p ())
2116 dump_printf_loc (MSG_NOTE, vect_location,
2117 "using gather/scatter for strided/grouped access,"
2118 " scale = %d\n", gs_info->scale);
2119
2120 return true;
2121}
2122
62da9e14
RS
2123/* STMT is a non-strided load or store, meaning that it accesses
2124 elements with a known constant step. Return -1 if that step
2125 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2126
2127static int
2128compare_step_with_zero (gimple *stmt)
2129{
2130 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2131 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2132 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2133 size_zero_node);
62da9e14
RS
2134}
2135
2136/* If the target supports a permute mask that reverses the elements in
2137 a vector of type VECTYPE, return that mask, otherwise return null. */
2138
2139static tree
2140perm_mask_for_reverse (tree vectype)
2141{
928686b1 2142 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2143
d980067b
RS
2144 /* The encoding has a single stepped pattern. */
2145 vec_perm_builder sel (nunits, 1, 3);
928686b1 2146 for (int i = 0; i < 3; ++i)
908a1a16 2147 sel.quick_push (nunits - 1 - i);
62da9e14 2148
e3342de4
RS
2149 vec_perm_indices indices (sel, 1, nunits);
2150 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2151 return NULL_TREE;
e3342de4 2152 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2153}
5ce9450f 2154
c3a8f964
RS
2155/* STMT is either a masked or unconditional store. Return the value
2156 being stored. */
2157
f307441a 2158tree
c3a8f964
RS
2159vect_get_store_rhs (gimple *stmt)
2160{
2161 if (gassign *assign = dyn_cast <gassign *> (stmt))
2162 {
2163 gcc_assert (gimple_assign_single_p (assign));
2164 return gimple_assign_rhs1 (assign);
2165 }
2166 if (gcall *call = dyn_cast <gcall *> (stmt))
2167 {
2168 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2169 int index = internal_fn_stored_value_index (ifn);
2170 gcc_assert (index >= 0);
2171 return gimple_call_arg (stmt, index);
c3a8f964
RS
2172 }
2173 gcc_unreachable ();
2174}
2175
2de001ee
RS
2176/* A subroutine of get_load_store_type, with a subset of the same
2177 arguments. Handle the case where STMT is part of a grouped load
2178 or store.
2179
2180 For stores, the statements in the group are all consecutive
2181 and there is no gap at the end. For loads, the statements in the
2182 group might not be consecutive; there can be gaps between statements
2183 as well as at the end. */
2184
2185static bool
2186get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2187 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2188 vect_memory_access_type *memory_access_type,
2189 gather_scatter_info *gs_info)
2de001ee
RS
2190{
2191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2192 vec_info *vinfo = stmt_info->vinfo;
2193 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2194 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2c53b149 2195 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 2196 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 2197 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2de001ee 2198 bool single_element_p = (stmt == first_stmt
2c53b149
RB
2199 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2200 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 2201 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2202
2203 /* True if the vectorized statements would access beyond the last
2204 statement in the group. */
2205 bool overrun_p = false;
2206
2207 /* True if we can cope with such overrun by peeling for gaps, so that
2208 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2209 bool can_overrun_p = (!masked_p
2210 && vls_type == VLS_LOAD
2211 && loop_vinfo
2212 && !loop->inner);
2de001ee
RS
2213
2214 /* There can only be a gap at the end of the group if the stride is
2215 known at compile time. */
2216 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2217
2218 /* Stores can't yet have gaps. */
2219 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2220
2221 if (slp)
2222 {
2223 if (STMT_VINFO_STRIDED_P (stmt_info))
2224 {
2c53b149 2225 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2de001ee
RS
2226 separated by the stride, until we have a complete vector.
2227 Fall back to scalar accesses if that isn't possible. */
928686b1 2228 if (multiple_p (nunits, group_size))
2de001ee
RS
2229 *memory_access_type = VMAT_STRIDED_SLP;
2230 else
2231 *memory_access_type = VMAT_ELEMENTWISE;
2232 }
2233 else
2234 {
2235 overrun_p = loop_vinfo && gap != 0;
2236 if (overrun_p && vls_type != VLS_LOAD)
2237 {
2238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2239 "Grouped store with gaps requires"
2240 " non-consecutive accesses\n");
2241 return false;
2242 }
f702e7d4
RS
2243 /* An overrun is fine if the trailing elements are smaller
2244 than the alignment boundary B. Every vector access will
2245 be a multiple of B and so we are guaranteed to access a
2246 non-gap element in the same B-sized block. */
f9ef2c76 2247 if (overrun_p
f702e7d4
RS
2248 && gap < (vect_known_alignment_in_bytes (first_dr)
2249 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2250 overrun_p = false;
2de001ee
RS
2251 if (overrun_p && !can_overrun_p)
2252 {
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2255 "Peeling for outer loop is not supported\n");
2256 return false;
2257 }
2258 *memory_access_type = VMAT_CONTIGUOUS;
2259 }
2260 }
2261 else
2262 {
2263 /* We can always handle this case using elementwise accesses,
2264 but see if something more efficient is available. */
2265 *memory_access_type = VMAT_ELEMENTWISE;
2266
2267 /* If there is a gap at the end of the group then these optimizations
2268 would access excess elements in the last iteration. */
2269 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2270 /* An overrun is fine if the trailing elements are smaller than the
2271 alignment boundary B. Every vector access will be a multiple of B
2272 and so we are guaranteed to access a non-gap element in the
2273 same B-sized block. */
f9ef2c76 2274 if (would_overrun_p
7e11fc7f 2275 && !masked_p
f702e7d4
RS
2276 && gap < (vect_known_alignment_in_bytes (first_dr)
2277 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2278 would_overrun_p = false;
f702e7d4 2279
2de001ee 2280 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2281 && (can_overrun_p || !would_overrun_p)
2282 && compare_step_with_zero (stmt) > 0)
2de001ee 2283 {
6737facb
RS
2284 /* First cope with the degenerate case of a single-element
2285 vector. */
2286 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2287 *memory_access_type = VMAT_CONTIGUOUS;
2288
2289 /* Otherwise try using LOAD/STORE_LANES. */
2290 if (*memory_access_type == VMAT_ELEMENTWISE
2291 && (vls_type == VLS_LOAD
7e11fc7f
RS
2292 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2293 : vect_store_lanes_supported (vectype, group_size,
2294 masked_p)))
2de001ee
RS
2295 {
2296 *memory_access_type = VMAT_LOAD_STORE_LANES;
2297 overrun_p = would_overrun_p;
2298 }
2299
2300 /* If that fails, try using permuting loads. */
2301 if (*memory_access_type == VMAT_ELEMENTWISE
2302 && (vls_type == VLS_LOAD
2303 ? vect_grouped_load_supported (vectype, single_element_p,
2304 group_size)
2305 : vect_grouped_store_supported (vectype, group_size)))
2306 {
2307 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2308 overrun_p = would_overrun_p;
2309 }
2310 }
429ef523
RS
2311
2312 /* As a last resort, trying using a gather load or scatter store.
2313
2314 ??? Although the code can handle all group sizes correctly,
2315 it probably isn't a win to use separate strided accesses based
2316 on nearby locations. Or, even if it's a win over scalar code,
2317 it might not be a win over vectorizing at a lower VF, if that
2318 allows us to use contiguous accesses. */
2319 if (*memory_access_type == VMAT_ELEMENTWISE
2320 && single_element_p
2321 && loop_vinfo
2322 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2323 masked_p, gs_info))
2324 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2325 }
2326
2327 if (vls_type != VLS_LOAD && first_stmt == stmt)
2328 {
2329 /* STMT is the leader of the group. Check the operands of all the
2330 stmts of the group. */
2c53b149 2331 gimple *next_stmt = DR_GROUP_NEXT_ELEMENT (stmt_info);
2de001ee
RS
2332 while (next_stmt)
2333 {
7e11fc7f 2334 tree op = vect_get_store_rhs (next_stmt);
2de001ee 2335 enum vect_def_type dt;
894dd753 2336 if (!vect_is_simple_use (op, vinfo, &dt))
2de001ee
RS
2337 {
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2340 "use not simple.\n");
2341 return false;
2342 }
2c53b149 2343 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2de001ee
RS
2344 }
2345 }
2346
2347 if (overrun_p)
2348 {
2349 gcc_assert (can_overrun_p);
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 "Data access with gaps requires scalar "
2353 "epilogue loop\n");
2354 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2355 }
2356
2357 return true;
2358}
2359
62da9e14
RS
2360/* A subroutine of get_load_store_type, with a subset of the same
2361 arguments. Handle the case where STMT is a load or store that
2362 accesses consecutive elements with a negative step. */
2363
2364static vect_memory_access_type
2365get_negative_load_store_type (gimple *stmt, tree vectype,
2366 vec_load_store_type vls_type,
2367 unsigned int ncopies)
2368{
2369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2370 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2371 dr_alignment_support alignment_support_scheme;
2372
2373 if (ncopies > 1)
2374 {
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "multiple types with negative step.\n");
2378 return VMAT_ELEMENTWISE;
2379 }
2380
2381 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2382 if (alignment_support_scheme != dr_aligned
2383 && alignment_support_scheme != dr_unaligned_supported)
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "negative step but alignment required.\n");
2388 return VMAT_ELEMENTWISE;
2389 }
2390
2391 if (vls_type == VLS_STORE_INVARIANT)
2392 {
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_NOTE, vect_location,
2395 "negative step with invariant source;"
2396 " no permute needed.\n");
2397 return VMAT_CONTIGUOUS_DOWN;
2398 }
2399
2400 if (!perm_mask_for_reverse (vectype))
2401 {
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2404 "negative step and reversing not supported.\n");
2405 return VMAT_ELEMENTWISE;
2406 }
2407
2408 return VMAT_CONTIGUOUS_REVERSE;
2409}
2410
2de001ee
RS
2411/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2412 if there is a memory access type that the vectorized form can use,
2413 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2414 or scatters, fill in GS_INFO accordingly.
2415
2416 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2417 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2418 VECTYPE is the vector type that the vectorized statements will use.
2419 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2420
2421static bool
7e11fc7f 2422get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2423 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2424 vect_memory_access_type *memory_access_type,
2425 gather_scatter_info *gs_info)
2426{
2427 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2428 vec_info *vinfo = stmt_info->vinfo;
2429 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2430 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2431 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2432 {
2433 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2434 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2435 gcc_unreachable ();
894dd753 2436 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2de001ee
RS
2437 &gs_info->offset_dt,
2438 &gs_info->offset_vectype))
2439 {
2440 if (dump_enabled_p ())
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2442 "%s index use not simple.\n",
2443 vls_type == VLS_LOAD ? "gather" : "scatter");
2444 return false;
2445 }
2446 }
2447 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2448 {
7e11fc7f 2449 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2450 memory_access_type, gs_info))
2de001ee
RS
2451 return false;
2452 }
2453 else if (STMT_VINFO_STRIDED_P (stmt_info))
2454 {
2455 gcc_assert (!slp);
ab2fc782 2456 if (loop_vinfo
429ef523
RS
2457 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2458 masked_p, gs_info))
ab2fc782
RS
2459 *memory_access_type = VMAT_GATHER_SCATTER;
2460 else
2461 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2462 }
2463 else
62da9e14
RS
2464 {
2465 int cmp = compare_step_with_zero (stmt);
2466 if (cmp < 0)
2467 *memory_access_type = get_negative_load_store_type
2468 (stmt, vectype, vls_type, ncopies);
2469 else if (cmp == 0)
2470 {
2471 gcc_assert (vls_type == VLS_LOAD);
2472 *memory_access_type = VMAT_INVARIANT;
2473 }
2474 else
2475 *memory_access_type = VMAT_CONTIGUOUS;
2476 }
2de001ee 2477
4d694b27
RS
2478 if ((*memory_access_type == VMAT_ELEMENTWISE
2479 || *memory_access_type == VMAT_STRIDED_SLP)
2480 && !nunits.is_constant ())
2481 {
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2484 "Not using elementwise accesses due to variable "
2485 "vectorization factor.\n");
2486 return false;
2487 }
2488
2de001ee
RS
2489 /* FIXME: At the moment the cost model seems to underestimate the
2490 cost of using elementwise accesses. This check preserves the
2491 traditional behavior until that can be fixed. */
2492 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8 2493 && !STMT_VINFO_STRIDED_P (stmt_info)
2c53b149
RB
2494 && !(stmt == DR_GROUP_FIRST_ELEMENT (stmt_info)
2495 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2496 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2de001ee
RS
2497 {
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2500 "not falling back to elementwise accesses\n");
2501 return false;
2502 }
2503 return true;
2504}
2505
aaeefd88 2506/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2507 conditional load or store STMT. When returning true, store the type
2508 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2509 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2510
2511static bool
929b4411
RS
2512vect_check_load_store_mask (gimple *stmt, tree mask,
2513 vect_def_type *mask_dt_out,
2514 tree *mask_vectype_out)
aaeefd88
RS
2515{
2516 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2517 {
2518 if (dump_enabled_p ())
2519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2520 "mask argument is not a boolean.\n");
2521 return false;
2522 }
2523
2524 if (TREE_CODE (mask) != SSA_NAME)
2525 {
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2528 "mask argument is not an SSA name.\n");
2529 return false;
2530 }
2531
2532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2533 enum vect_def_type mask_dt;
aaeefd88 2534 tree mask_vectype;
894dd753 2535 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
aaeefd88
RS
2536 {
2537 if (dump_enabled_p ())
2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2539 "mask use not simple.\n");
2540 return false;
2541 }
2542
2543 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2544 if (!mask_vectype)
2545 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2546
2547 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2548 {
2549 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 "could not find an appropriate vector mask type.\n");
2552 return false;
2553 }
2554
2555 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2556 TYPE_VECTOR_SUBPARTS (vectype)))
2557 {
2558 if (dump_enabled_p ())
2559 {
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 "vector mask type ");
2562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2563 dump_printf (MSG_MISSED_OPTIMIZATION,
2564 " does not match vector data type ");
2565 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2566 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2567 }
2568 return false;
2569 }
2570
929b4411 2571 *mask_dt_out = mask_dt;
aaeefd88
RS
2572 *mask_vectype_out = mask_vectype;
2573 return true;
2574}
2575
3133c3b6
RS
2576/* Return true if stored value RHS is suitable for vectorizing store
2577 statement STMT. When returning true, store the type of the
929b4411
RS
2578 definition in *RHS_DT_OUT, the type of the vectorized store value in
2579 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2580
2581static bool
929b4411
RS
2582vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2583 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2584{
2585 /* In the case this is a store from a constant make sure
2586 native_encode_expr can handle it. */
2587 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2588 {
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2591 "cannot encode constant as a byte sequence.\n");
2592 return false;
2593 }
2594
2595 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2596 enum vect_def_type rhs_dt;
3133c3b6 2597 tree rhs_vectype;
894dd753 2598 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
3133c3b6
RS
2599 {
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 "use not simple.\n");
2603 return false;
2604 }
2605
2606 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2607 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2608 {
2609 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2611 "incompatible vector types.\n");
2612 return false;
2613 }
2614
929b4411 2615 *rhs_dt_out = rhs_dt;
3133c3b6 2616 *rhs_vectype_out = rhs_vectype;
929b4411 2617 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2618 *vls_type_out = VLS_STORE_INVARIANT;
2619 else
2620 *vls_type_out = VLS_STORE;
2621 return true;
2622}
2623
bc9587eb
RS
2624/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2625 Note that we support masks with floating-point type, in which case the
2626 floats are interpreted as a bitmask. */
2627
2628static tree
2629vect_build_all_ones_mask (gimple *stmt, tree masktype)
2630{
2631 if (TREE_CODE (masktype) == INTEGER_TYPE)
2632 return build_int_cst (masktype, -1);
2633 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2634 {
2635 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2636 mask = build_vector_from_val (masktype, mask);
2637 return vect_init_vector (stmt, mask, masktype, NULL);
2638 }
2639 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2640 {
2641 REAL_VALUE_TYPE r;
2642 long tmp[6];
2643 for (int j = 0; j < 6; ++j)
2644 tmp[j] = -1;
2645 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2646 tree mask = build_real (TREE_TYPE (masktype), r);
2647 mask = build_vector_from_val (masktype, mask);
2648 return vect_init_vector (stmt, mask, masktype, NULL);
2649 }
2650 gcc_unreachable ();
2651}
2652
2653/* Build an all-zero merge value of type VECTYPE while vectorizing
2654 STMT as a gather load. */
2655
2656static tree
2657vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2658{
2659 tree merge;
2660 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2661 merge = build_int_cst (TREE_TYPE (vectype), 0);
2662 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2663 {
2664 REAL_VALUE_TYPE r;
2665 long tmp[6];
2666 for (int j = 0; j < 6; ++j)
2667 tmp[j] = 0;
2668 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2669 merge = build_real (TREE_TYPE (vectype), r);
2670 }
2671 else
2672 gcc_unreachable ();
2673 merge = build_vector_from_val (vectype, merge);
2674 return vect_init_vector (stmt, merge, vectype, NULL);
2675}
2676
c48d2d35
RS
2677/* Build a gather load call while vectorizing STMT. Insert new instructions
2678 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2679 operation. If the load is conditional, MASK is the unvectorized
929b4411 2680 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2681
2682static void
2683vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2684 gimple **vec_stmt, gather_scatter_info *gs_info,
929b4411 2685 tree mask, vect_def_type mask_dt)
c48d2d35
RS
2686{
2687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2688 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2689 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2690 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2691 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2692 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2693 edge pe = loop_preheader_edge (loop);
2694 enum { NARROW, NONE, WIDEN } modifier;
2695 poly_uint64 gather_off_nunits
2696 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2697
2698 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2699 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2700 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2701 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2702 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2703 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2704 tree scaletype = TREE_VALUE (arglist);
2705 gcc_checking_assert (types_compatible_p (srctype, rettype)
2706 && (!mask || types_compatible_p (srctype, masktype)));
2707
2708 tree perm_mask = NULL_TREE;
2709 tree mask_perm_mask = NULL_TREE;
2710 if (known_eq (nunits, gather_off_nunits))
2711 modifier = NONE;
2712 else if (known_eq (nunits * 2, gather_off_nunits))
2713 {
2714 modifier = WIDEN;
2715
2716 /* Currently widening gathers and scatters are only supported for
2717 fixed-length vectors. */
2718 int count = gather_off_nunits.to_constant ();
2719 vec_perm_builder sel (count, count, 1);
2720 for (int i = 0; i < count; ++i)
2721 sel.quick_push (i | (count / 2));
2722
2723 vec_perm_indices indices (sel, 1, count);
2724 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2725 indices);
2726 }
2727 else if (known_eq (nunits, gather_off_nunits * 2))
2728 {
2729 modifier = NARROW;
2730
2731 /* Currently narrowing gathers and scatters are only supported for
2732 fixed-length vectors. */
2733 int count = nunits.to_constant ();
2734 vec_perm_builder sel (count, count, 1);
2735 sel.quick_grow (count);
2736 for (int i = 0; i < count; ++i)
2737 sel[i] = i < count / 2 ? i : i + count / 2;
2738 vec_perm_indices indices (sel, 2, count);
2739 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2740
2741 ncopies *= 2;
2742
2743 if (mask)
2744 {
2745 for (int i = 0; i < count; ++i)
2746 sel[i] = i | (count / 2);
2747 indices.new_vector (sel, 2, count);
2748 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2749 }
2750 }
2751 else
2752 gcc_unreachable ();
2753
2754 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2755 vectype);
2756
2757 tree ptr = fold_convert (ptrtype, gs_info->base);
2758 if (!is_gimple_min_invariant (ptr))
2759 {
2760 gimple_seq seq;
2761 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2762 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2763 gcc_assert (!new_bb);
2764 }
2765
2766 tree scale = build_int_cst (scaletype, gs_info->scale);
2767
2768 tree vec_oprnd0 = NULL_TREE;
2769 tree vec_mask = NULL_TREE;
2770 tree src_op = NULL_TREE;
2771 tree mask_op = NULL_TREE;
2772 tree prev_res = NULL_TREE;
2773 stmt_vec_info prev_stmt_info = NULL;
2774
2775 if (!mask)
2776 {
2777 src_op = vect_build_zero_merge_argument (stmt, rettype);
2778 mask_op = vect_build_all_ones_mask (stmt, masktype);
2779 }
2780
2781 for (int j = 0; j < ncopies; ++j)
2782 {
2783 tree op, var;
2784 gimple *new_stmt;
2785 if (modifier == WIDEN && (j & 1))
2786 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2787 perm_mask, stmt, gsi);
2788 else if (j == 0)
2789 op = vec_oprnd0
2790 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2791 else
2792 op = vec_oprnd0
2793 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2794
2795 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2796 {
2797 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2798 TYPE_VECTOR_SUBPARTS (idxtype)));
2799 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2800 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2801 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2803 op = var;
2804 }
2805
2806 if (mask)
2807 {
2808 if (mask_perm_mask && (j & 1))
2809 mask_op = permute_vec_elements (mask_op, mask_op,
2810 mask_perm_mask, stmt, gsi);
2811 else
2812 {
2813 if (j == 0)
2814 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2815 else
929b4411 2816 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2817
2818 mask_op = vec_mask;
2819 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2820 {
2821 gcc_assert
2822 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2823 TYPE_VECTOR_SUBPARTS (masktype)));
2824 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2825 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2826 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2827 mask_op);
2828 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2829 mask_op = var;
2830 }
2831 }
2832 src_op = mask_op;
2833 }
2834
2835 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2836 mask_op, scale);
2837
2838 if (!useless_type_conversion_p (vectype, rettype))
2839 {
2840 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2841 TYPE_VECTOR_SUBPARTS (rettype)));
2842 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2843 gimple_call_set_lhs (new_stmt, op);
2844 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2845 var = make_ssa_name (vec_dest);
2846 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2847 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2848 }
2849 else
2850 {
2851 var = make_ssa_name (vec_dest, new_stmt);
2852 gimple_call_set_lhs (new_stmt, var);
2853 }
2854
2855 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2856
2857 if (modifier == NARROW)
2858 {
2859 if ((j & 1) == 0)
2860 {
2861 prev_res = var;
2862 continue;
2863 }
2864 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2865 new_stmt = SSA_NAME_DEF_STMT (var);
2866 }
2867
2868 if (prev_stmt_info == NULL)
2869 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2870 else
2871 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2872 prev_stmt_info = vinfo_for_stmt (new_stmt);
2873 }
2874}
2875
bfaa08b7
RS
2876/* Prepare the base and offset in GS_INFO for vectorization.
2877 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2878 to the vectorized offset argument for the first copy of STMT. STMT
2879 is the statement described by GS_INFO and LOOP is the containing loop. */
2880
2881static void
2882vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2883 gather_scatter_info *gs_info,
2884 tree *dataref_ptr, tree *vec_offset)
2885{
2886 gimple_seq stmts = NULL;
2887 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2888 if (stmts != NULL)
2889 {
2890 basic_block new_bb;
2891 edge pe = loop_preheader_edge (loop);
2892 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2893 gcc_assert (!new_bb);
2894 }
2895 tree offset_type = TREE_TYPE (gs_info->offset);
2896 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2897 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2898 offset_vectype);
2899}
2900
ab2fc782
RS
2901/* Prepare to implement a grouped or strided load or store using
2902 the gather load or scatter store operation described by GS_INFO.
2903 STMT is the load or store statement.
2904
2905 Set *DATAREF_BUMP to the amount that should be added to the base
2906 address after each copy of the vectorized statement. Set *VEC_OFFSET
2907 to an invariant offset vector in which element I has the value
2908 I * DR_STEP / SCALE. */
2909
2910static void
2911vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2912 gather_scatter_info *gs_info,
2913 tree *dataref_bump, tree *vec_offset)
2914{
2915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2916 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2917 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2918 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2919 gimple_seq stmts;
2920
2921 tree bump = size_binop (MULT_EXPR,
2922 fold_convert (sizetype, DR_STEP (dr)),
2923 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2924 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2925 if (stmts)
2926 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2927
2928 /* The offset given in GS_INFO can have pointer type, so use the element
2929 type of the vector instead. */
2930 tree offset_type = TREE_TYPE (gs_info->offset);
2931 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2932 offset_type = TREE_TYPE (offset_vectype);
2933
2934 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2935 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2936 ssize_int (gs_info->scale));
2937 step = fold_convert (offset_type, step);
2938 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2939
2940 /* Create {0, X, X*2, X*3, ...}. */
2941 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2942 build_zero_cst (offset_type), step);
2943 if (stmts)
2944 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2945}
2946
2947/* Return the amount that should be added to a vector pointer to move
2948 to the next or previous copy of AGGR_TYPE. DR is the data reference
2949 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2950 vectorization. */
2951
2952static tree
2953vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2954 vect_memory_access_type memory_access_type)
2955{
2956 if (memory_access_type == VMAT_INVARIANT)
2957 return size_zero_node;
2958
2959 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2960 tree step = vect_dr_behavior (dr)->step;
2961 if (tree_int_cst_sgn (step) == -1)
2962 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2963 return iv_step;
2964}
2965
37b14185
RB
2966/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2967
2968static bool
2969vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2970 gimple **vec_stmt, slp_tree slp_node,
68435eb2
RB
2971 tree vectype_in, enum vect_def_type *dt,
2972 stmt_vector_for_cost *cost_vec)
37b14185
RB
2973{
2974 tree op, vectype;
2975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2976 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2977 unsigned ncopies;
2978 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2979
2980 op = gimple_call_arg (stmt, 0);
2981 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2982
2983 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2984 return false;
37b14185
RB
2985
2986 /* Multiple types in SLP are handled by creating the appropriate number of
2987 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2988 case of SLP. */
2989 if (slp_node)
2990 ncopies = 1;
2991 else
e8f142e2 2992 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2993
2994 gcc_assert (ncopies >= 1);
2995
2996 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2997 if (! char_vectype)
2998 return false;
2999
928686b1
RS
3000 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3001 return false;
3002
794e3180 3003 unsigned word_bytes = num_bytes / nunits;
908a1a16 3004
d980067b
RS
3005 /* The encoding uses one stepped pattern for each byte in the word. */
3006 vec_perm_builder elts (num_bytes, word_bytes, 3);
3007 for (unsigned i = 0; i < 3; ++i)
37b14185 3008 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 3009 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 3010
e3342de4
RS
3011 vec_perm_indices indices (elts, 1, num_bytes);
3012 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
3013 return false;
3014
3015 if (! vec_stmt)
3016 {
3017 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3018 DUMP_VECT_SCOPE ("vectorizable_bswap");
78604de0 3019 if (! slp_node)
37b14185 3020 {
68435eb2
RB
3021 record_stmt_cost (cost_vec,
3022 1, vector_stmt, stmt_info, 0, vect_prologue);
3023 record_stmt_cost (cost_vec,
3024 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
3025 }
3026 return true;
3027 }
3028
736d0f28 3029 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
3030
3031 /* Transform. */
3032 vec<tree> vec_oprnds = vNULL;
3033 gimple *new_stmt = NULL;
3034 stmt_vec_info prev_stmt_info = NULL;
3035 for (unsigned j = 0; j < ncopies; j++)
3036 {
3037 /* Handle uses. */
3038 if (j == 0)
306b0c92 3039 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
3040 else
3041 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3042
3043 /* Arguments are ready. create the new vector stmt. */
3044 unsigned i;
3045 tree vop;
3046 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3047 {
3048 tree tem = make_ssa_name (char_vectype);
3049 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3050 char_vectype, vop));
3051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3052 tree tem2 = make_ssa_name (char_vectype);
3053 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3054 tem, tem, bswap_vconst);
3055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3056 tem = make_ssa_name (vectype);
3057 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3058 vectype, tem2));
3059 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3060 if (slp_node)
3061 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3062 }
3063
3064 if (slp_node)
3065 continue;
3066
3067 if (j == 0)
3068 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3069 else
3070 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3071
3072 prev_stmt_info = vinfo_for_stmt (new_stmt);
3073 }
3074
3075 vec_oprnds.release ();
3076 return true;
3077}
3078
b1b6836e
RS
3079/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3080 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3081 in a single step. On success, store the binary pack code in
3082 *CONVERT_CODE. */
3083
3084static bool
3085simple_integer_narrowing (tree vectype_out, tree vectype_in,
3086 tree_code *convert_code)
3087{
3088 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3089 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3090 return false;
3091
3092 tree_code code;
3093 int multi_step_cvt = 0;
3094 auto_vec <tree, 8> interm_types;
3095 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3096 &code, &multi_step_cvt,
3097 &interm_types)
3098 || multi_step_cvt)
3099 return false;
3100
3101 *convert_code = code;
3102 return true;
3103}
5ce9450f 3104
ebfd146a
IR
3105/* Function vectorizable_call.
3106
538dd0b7 3107 Check if GS performs a function call that can be vectorized.
b8698a0f 3108 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3109 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3110 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3111
3112static bool
355fe088 3113vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 3114 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 3115{
538dd0b7 3116 gcall *stmt;
ebfd146a
IR
3117 tree vec_dest;
3118 tree scalar_dest;
3119 tree op, type;
3120 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3121 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3122 tree vectype_out, vectype_in;
c7bda0f4
RS
3123 poly_uint64 nunits_in;
3124 poly_uint64 nunits_out;
ebfd146a 3125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3126 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3127 vec_info *vinfo = stmt_info->vinfo;
81c40241 3128 tree fndecl, new_temp, rhs_type;
0502fb85
UB
3129 enum vect_def_type dt[3]
3130 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 3131 int ndts = 3;
355fe088 3132 gimple *new_stmt = NULL;
ebfd146a 3133 int ncopies, j;
6e1aa848 3134 vec<tree> vargs = vNULL;
ebfd146a
IR
3135 enum { NARROW, NONE, WIDEN } modifier;
3136 size_t i, nargs;
9d5e7640 3137 tree lhs;
ebfd146a 3138
190c2236 3139 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3140 return false;
3141
66c16fd9
RB
3142 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3143 && ! vec_stmt)
ebfd146a
IR
3144 return false;
3145
538dd0b7
DM
3146 /* Is GS a vectorizable call? */
3147 stmt = dyn_cast <gcall *> (gs);
3148 if (!stmt)
ebfd146a
IR
3149 return false;
3150
5ce9450f 3151 if (gimple_call_internal_p (stmt)
bfaa08b7 3152 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3153 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3154 /* Handled by vectorizable_load and vectorizable_store. */
3155 return false;
5ce9450f 3156
0136f8f0
AH
3157 if (gimple_call_lhs (stmt) == NULL_TREE
3158 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3159 return false;
3160
0136f8f0 3161 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3162
b690cc0f
RG
3163 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3164
ebfd146a
IR
3165 /* Process function arguments. */
3166 rhs_type = NULL_TREE;
b690cc0f 3167 vectype_in = NULL_TREE;
ebfd146a
IR
3168 nargs = gimple_call_num_args (stmt);
3169
1b1562a5
MM
3170 /* Bail out if the function has more than three arguments, we do not have
3171 interesting builtin functions to vectorize with more than two arguments
3172 except for fma. No arguments is also not good. */
3173 if (nargs == 0 || nargs > 3)
ebfd146a
IR
3174 return false;
3175
74bf76ed
JJ
3176 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3177 if (gimple_call_internal_p (stmt)
3178 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3179 {
3180 nargs = 0;
3181 rhs_type = unsigned_type_node;
3182 }
3183
ebfd146a
IR
3184 for (i = 0; i < nargs; i++)
3185 {
b690cc0f
RG
3186 tree opvectype;
3187
ebfd146a
IR
3188 op = gimple_call_arg (stmt, i);
3189
3190 /* We can only handle calls with arguments of the same type. */
3191 if (rhs_type
8533c9d8 3192 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3193 {
73fbfcad 3194 if (dump_enabled_p ())
78c60e3d 3195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3196 "argument types differ.\n");
ebfd146a
IR
3197 return false;
3198 }
b690cc0f
RG
3199 if (!rhs_type)
3200 rhs_type = TREE_TYPE (op);
ebfd146a 3201
894dd753 3202 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
ebfd146a 3203 {
73fbfcad 3204 if (dump_enabled_p ())
78c60e3d 3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3206 "use not simple.\n");
ebfd146a
IR
3207 return false;
3208 }
ebfd146a 3209
b690cc0f
RG
3210 if (!vectype_in)
3211 vectype_in = opvectype;
3212 else if (opvectype
3213 && opvectype != vectype_in)
3214 {
73fbfcad 3215 if (dump_enabled_p ())
78c60e3d 3216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3217 "argument vector types differ.\n");
b690cc0f
RG
3218 return false;
3219 }
3220 }
3221 /* If all arguments are external or constant defs use a vector type with
3222 the same size as the output vector type. */
ebfd146a 3223 if (!vectype_in)
b690cc0f 3224 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3225 if (vec_stmt)
3226 gcc_assert (vectype_in);
3227 if (!vectype_in)
3228 {
73fbfcad 3229 if (dump_enabled_p ())
7d8930a0 3230 {
78c60e3d
SS
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3232 "no vectype for scalar type ");
3233 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3234 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3235 }
3236
3237 return false;
3238 }
ebfd146a
IR
3239
3240 /* FORNOW */
b690cc0f
RG
3241 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3242 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3243 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3244 modifier = NARROW;
c7bda0f4 3245 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3246 modifier = NONE;
c7bda0f4 3247 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3248 modifier = WIDEN;
3249 else
3250 return false;
3251
70439f0d
RS
3252 /* We only handle functions that do not read or clobber memory. */
3253 if (gimple_vuse (stmt))
3254 {
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3257 "function reads from or writes to memory.\n");
3258 return false;
3259 }
3260
ebfd146a
IR
3261 /* For now, we only vectorize functions if a target specific builtin
3262 is available. TODO -- in some cases, it might be profitable to
3263 insert the calls for pieces of the vector, in order to be able
3264 to vectorize other operations in the loop. */
70439f0d
RS
3265 fndecl = NULL_TREE;
3266 internal_fn ifn = IFN_LAST;
3267 combined_fn cfn = gimple_call_combined_fn (stmt);
3268 tree callee = gimple_call_fndecl (stmt);
3269
3270 /* First try using an internal function. */
b1b6836e
RS
3271 tree_code convert_code = ERROR_MARK;
3272 if (cfn != CFN_LAST
3273 && (modifier == NONE
3274 || (modifier == NARROW
3275 && simple_integer_narrowing (vectype_out, vectype_in,
3276 &convert_code))))
70439f0d
RS
3277 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3278 vectype_in);
3279
3280 /* If that fails, try asking for a target-specific built-in function. */
3281 if (ifn == IFN_LAST)
3282 {
3283 if (cfn != CFN_LAST)
3284 fndecl = targetm.vectorize.builtin_vectorized_function
3285 (cfn, vectype_out, vectype_in);
7672aa9b 3286 else if (callee)
70439f0d
RS
3287 fndecl = targetm.vectorize.builtin_md_vectorized_function
3288 (callee, vectype_out, vectype_in);
3289 }
3290
3291 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3292 {
70439f0d 3293 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3294 && !slp_node
3295 && loop_vinfo
3296 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3297 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3298 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3299 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3300 {
3301 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3302 { 0, 1, 2, ... vf - 1 } vector. */
3303 gcc_assert (nargs == 0);
3304 }
37b14185
RB
3305 else if (modifier == NONE
3306 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3307 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3308 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3309 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
68435eb2 3310 vectype_in, dt, cost_vec);
74bf76ed
JJ
3311 else
3312 {
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3315 "function is not vectorizable.\n");
74bf76ed
JJ
3316 return false;
3317 }
ebfd146a
IR
3318 }
3319
fce57248 3320 if (slp_node)
190c2236 3321 ncopies = 1;
b1b6836e 3322 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3323 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3324 else
e8f142e2 3325 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3326
3327 /* Sanity check: make sure that at least one copy of the vectorized stmt
3328 needs to be generated. */
3329 gcc_assert (ncopies >= 1);
3330
3331 if (!vec_stmt) /* transformation not required. */
3332 {
3333 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3334 DUMP_VECT_SCOPE ("vectorizable_call");
68435eb2
RB
3335 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3336 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3337 record_stmt_cost (cost_vec, ncopies / 2,
3338 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3339
ebfd146a
IR
3340 return true;
3341 }
3342
67b8dbac 3343 /* Transform. */
ebfd146a 3344
73fbfcad 3345 if (dump_enabled_p ())
e645e942 3346 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3347
3348 /* Handle def. */
3349 scalar_dest = gimple_call_lhs (stmt);
3350 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3351
3352 prev_stmt_info = NULL;
b1b6836e 3353 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3354 {
b1b6836e 3355 tree prev_res = NULL_TREE;
ebfd146a
IR
3356 for (j = 0; j < ncopies; ++j)
3357 {
3358 /* Build argument list for the vectorized call. */
3359 if (j == 0)
9771b263 3360 vargs.create (nargs);
ebfd146a 3361 else
9771b263 3362 vargs.truncate (0);
ebfd146a 3363
190c2236
JJ
3364 if (slp_node)
3365 {
ef062b13 3366 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3367 vec<tree> vec_oprnds0;
190c2236
JJ
3368
3369 for (i = 0; i < nargs; i++)
9771b263 3370 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3371 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3372 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3373
3374 /* Arguments are ready. Create the new vector stmt. */
9771b263 3375 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3376 {
3377 size_t k;
3378 for (k = 0; k < nargs; k++)
3379 {
37b5ec8f 3380 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3381 vargs[k] = vec_oprndsk[i];
190c2236 3382 }
b1b6836e
RS
3383 if (modifier == NARROW)
3384 {
3385 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3386 gcall *call
3387 = gimple_build_call_internal_vec (ifn, vargs);
3388 gimple_call_set_lhs (call, half_res);
3389 gimple_call_set_nothrow (call, true);
3390 new_stmt = call;
b1b6836e
RS
3391 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3392 if ((i & 1) == 0)
3393 {
3394 prev_res = half_res;
3395 continue;
3396 }
3397 new_temp = make_ssa_name (vec_dest);
3398 new_stmt = gimple_build_assign (new_temp, convert_code,
3399 prev_res, half_res);
3400 }
70439f0d 3401 else
b1b6836e 3402 {
a844293d 3403 gcall *call;
b1b6836e 3404 if (ifn != IFN_LAST)
a844293d 3405 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3406 else
a844293d
RS
3407 call = gimple_build_call_vec (fndecl, vargs);
3408 new_temp = make_ssa_name (vec_dest, call);
3409 gimple_call_set_lhs (call, new_temp);
3410 gimple_call_set_nothrow (call, true);
3411 new_stmt = call;
b1b6836e 3412 }
190c2236 3413 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3414 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3415 }
3416
3417 for (i = 0; i < nargs; i++)
3418 {
37b5ec8f 3419 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3420 vec_oprndsi.release ();
190c2236 3421 }
190c2236
JJ
3422 continue;
3423 }
3424
ebfd146a
IR
3425 for (i = 0; i < nargs; i++)
3426 {
3427 op = gimple_call_arg (stmt, i);
3428 if (j == 0)
3429 vec_oprnd0
81c40241 3430 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3431 else
63827fb8
IR
3432 {
3433 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3434 vec_oprnd0
3435 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3436 }
ebfd146a 3437
9771b263 3438 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
3439 }
3440
74bf76ed
JJ
3441 if (gimple_call_internal_p (stmt)
3442 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3443 {
c7bda0f4 3444 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3445 tree new_var
0e22bb5a 3446 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3447 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3448 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3449 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3450 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3451 }
b1b6836e
RS
3452 else if (modifier == NARROW)
3453 {
3454 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3455 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3456 gimple_call_set_lhs (call, half_res);
3457 gimple_call_set_nothrow (call, true);
3458 new_stmt = call;
b1b6836e
RS
3459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3460 if ((j & 1) == 0)
3461 {
3462 prev_res = half_res;
3463 continue;
3464 }
3465 new_temp = make_ssa_name (vec_dest);
3466 new_stmt = gimple_build_assign (new_temp, convert_code,
3467 prev_res, half_res);
3468 }
74bf76ed
JJ
3469 else
3470 {
a844293d 3471 gcall *call;
70439f0d 3472 if (ifn != IFN_LAST)
a844293d 3473 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3474 else
a844293d 3475 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3476 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3477 gimple_call_set_lhs (call, new_temp);
3478 gimple_call_set_nothrow (call, true);
3479 new_stmt = call;
74bf76ed 3480 }
ebfd146a
IR
3481 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3482
b1b6836e 3483 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3484 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3485 else
3486 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3487
3488 prev_stmt_info = vinfo_for_stmt (new_stmt);
3489 }
b1b6836e
RS
3490 }
3491 else if (modifier == NARROW)
3492 {
ebfd146a
IR
3493 for (j = 0; j < ncopies; ++j)
3494 {
3495 /* Build argument list for the vectorized call. */
3496 if (j == 0)
9771b263 3497 vargs.create (nargs * 2);
ebfd146a 3498 else
9771b263 3499 vargs.truncate (0);
ebfd146a 3500
190c2236
JJ
3501 if (slp_node)
3502 {
ef062b13 3503 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3504 vec<tree> vec_oprnds0;
190c2236
JJ
3505
3506 for (i = 0; i < nargs; i++)
9771b263 3507 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3508 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3509 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3510
3511 /* Arguments are ready. Create the new vector stmt. */
9771b263 3512 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3513 {
3514 size_t k;
9771b263 3515 vargs.truncate (0);
190c2236
JJ
3516 for (k = 0; k < nargs; k++)
3517 {
37b5ec8f 3518 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3519 vargs.quick_push (vec_oprndsk[i]);
3520 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3521 }
a844293d 3522 gcall *call;
70439f0d 3523 if (ifn != IFN_LAST)
a844293d 3524 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3525 else
a844293d
RS
3526 call = gimple_build_call_vec (fndecl, vargs);
3527 new_temp = make_ssa_name (vec_dest, call);
3528 gimple_call_set_lhs (call, new_temp);
3529 gimple_call_set_nothrow (call, true);
3530 new_stmt = call;
190c2236 3531 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3532 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3533 }
3534
3535 for (i = 0; i < nargs; i++)
3536 {
37b5ec8f 3537 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3538 vec_oprndsi.release ();
190c2236 3539 }
190c2236
JJ
3540 continue;
3541 }
3542
ebfd146a
IR
3543 for (i = 0; i < nargs; i++)
3544 {
3545 op = gimple_call_arg (stmt, i);
3546 if (j == 0)
3547 {
3548 vec_oprnd0
81c40241 3549 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3550 vec_oprnd1
63827fb8 3551 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3552 }
3553 else
3554 {
336ecb65 3555 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3556 vec_oprnd0
63827fb8 3557 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3558 vec_oprnd1
63827fb8 3559 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3560 }
3561
9771b263
DN
3562 vargs.quick_push (vec_oprnd0);
3563 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3564 }
3565
b1b6836e 3566 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3567 new_temp = make_ssa_name (vec_dest, new_stmt);
3568 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3570
3571 if (j == 0)
3572 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3573 else
3574 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3575
3576 prev_stmt_info = vinfo_for_stmt (new_stmt);
3577 }
3578
3579 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3580 }
b1b6836e
RS
3581 else
3582 /* No current target implements this case. */
3583 return false;
ebfd146a 3584
9771b263 3585 vargs.release ();
ebfd146a 3586
ebfd146a
IR
3587 /* The call in STMT might prevent it from being removed in dce.
3588 We however cannot remove it here, due to the way the ssa name
3589 it defines is mapped to the new definition. So just replace
3590 rhs of the statement with something harmless. */
3591
dd34c087
JJ
3592 if (slp_node)
3593 return true;
3594
ebfd146a 3595 type = TREE_TYPE (scalar_dest);
9d5e7640 3596 if (is_pattern_stmt_p (stmt_info))
ed7b8123
RS
3597 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3598 lhs = gimple_get_lhs (stmt_info->stmt);
3cc2fa2a 3599
9d5e7640 3600 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3601 set_vinfo_for_stmt (new_stmt, stmt_info);
ed7b8123 3602 set_vinfo_for_stmt (stmt_info->stmt, NULL);
ebfd146a
IR
3603 STMT_VINFO_STMT (stmt_info) = new_stmt;
3604 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3605
3606 return true;
3607}
3608
3609
0136f8f0
AH
3610struct simd_call_arg_info
3611{
3612 tree vectype;
3613 tree op;
0136f8f0 3614 HOST_WIDE_INT linear_step;
34e82342 3615 enum vect_def_type dt;
0136f8f0 3616 unsigned int align;
17b658af 3617 bool simd_lane_linear;
0136f8f0
AH
3618};
3619
17b658af
JJ
3620/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3621 is linear within simd lane (but not within whole loop), note it in
3622 *ARGINFO. */
3623
3624static void
3625vect_simd_lane_linear (tree op, struct loop *loop,
3626 struct simd_call_arg_info *arginfo)
3627{
355fe088 3628 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3629
3630 if (!is_gimple_assign (def_stmt)
3631 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3632 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3633 return;
3634
3635 tree base = gimple_assign_rhs1 (def_stmt);
3636 HOST_WIDE_INT linear_step = 0;
3637 tree v = gimple_assign_rhs2 (def_stmt);
3638 while (TREE_CODE (v) == SSA_NAME)
3639 {
3640 tree t;
3641 def_stmt = SSA_NAME_DEF_STMT (v);
3642 if (is_gimple_assign (def_stmt))
3643 switch (gimple_assign_rhs_code (def_stmt))
3644 {
3645 case PLUS_EXPR:
3646 t = gimple_assign_rhs2 (def_stmt);
3647 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3648 return;
3649 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3650 v = gimple_assign_rhs1 (def_stmt);
3651 continue;
3652 case MULT_EXPR:
3653 t = gimple_assign_rhs2 (def_stmt);
3654 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3655 return;
3656 linear_step = tree_to_shwi (t);
3657 v = gimple_assign_rhs1 (def_stmt);
3658 continue;
3659 CASE_CONVERT:
3660 t = gimple_assign_rhs1 (def_stmt);
3661 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3662 || (TYPE_PRECISION (TREE_TYPE (v))
3663 < TYPE_PRECISION (TREE_TYPE (t))))
3664 return;
3665 if (!linear_step)
3666 linear_step = 1;
3667 v = t;
3668 continue;
3669 default:
3670 return;
3671 }
8e4284d0 3672 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3673 && loop->simduid
3674 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3675 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3676 == loop->simduid))
3677 {
3678 if (!linear_step)
3679 linear_step = 1;
3680 arginfo->linear_step = linear_step;
3681 arginfo->op = base;
3682 arginfo->simd_lane_linear = true;
3683 return;
3684 }
3685 }
3686}
3687
cf1b2ba4
RS
3688/* Return the number of elements in vector type VECTYPE, which is associated
3689 with a SIMD clone. At present these vectors always have a constant
3690 length. */
3691
3692static unsigned HOST_WIDE_INT
3693simd_clone_subparts (tree vectype)
3694{
928686b1 3695 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3696}
3697
0136f8f0
AH
3698/* Function vectorizable_simd_clone_call.
3699
3700 Check if STMT performs a function call that can be vectorized
3701 by calling a simd clone of the function.
3702 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3703 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3704 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3705
3706static bool
355fe088 3707vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
3708 gimple **vec_stmt, slp_tree slp_node,
3709 stmt_vector_for_cost *)
0136f8f0
AH
3710{
3711 tree vec_dest;
3712 tree scalar_dest;
3713 tree op, type;
3714 tree vec_oprnd0 = NULL_TREE;
3715 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3716 tree vectype;
3717 unsigned int nunits;
3718 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3719 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3720 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3721 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3722 tree fndecl, new_temp;
355fe088 3723 gimple *new_stmt = NULL;
0136f8f0 3724 int ncopies, j;
00426f9a 3725 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3726 vec<tree> vargs = vNULL;
3727 size_t i, nargs;
3728 tree lhs, rtype, ratype;
e7a74006 3729 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3730
3731 /* Is STMT a vectorizable call? */
3732 if (!is_gimple_call (stmt))
3733 return false;
3734
3735 fndecl = gimple_call_fndecl (stmt);
3736 if (fndecl == NULL_TREE)
3737 return false;
3738
d52f5295 3739 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3740 if (node == NULL || node->simd_clones == NULL)
3741 return false;
3742
3743 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3744 return false;
3745
66c16fd9
RB
3746 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3747 && ! vec_stmt)
0136f8f0
AH
3748 return false;
3749
3750 if (gimple_call_lhs (stmt)
3751 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3752 return false;
3753
3754 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3755
3756 vectype = STMT_VINFO_VECTYPE (stmt_info);
3757
3758 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3759 return false;
3760
3761 /* FORNOW */
fce57248 3762 if (slp_node)
0136f8f0
AH
3763 return false;
3764
3765 /* Process function arguments. */
3766 nargs = gimple_call_num_args (stmt);
3767
3768 /* Bail out if the function has zero arguments. */
3769 if (nargs == 0)
3770 return false;
3771
00426f9a 3772 arginfo.reserve (nargs, true);
0136f8f0
AH
3773
3774 for (i = 0; i < nargs; i++)
3775 {
3776 simd_call_arg_info thisarginfo;
3777 affine_iv iv;
3778
3779 thisarginfo.linear_step = 0;
3780 thisarginfo.align = 0;
3781 thisarginfo.op = NULL_TREE;
17b658af 3782 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3783
3784 op = gimple_call_arg (stmt, i);
894dd753 3785 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
81c40241 3786 &thisarginfo.vectype)
0136f8f0
AH
3787 || thisarginfo.dt == vect_uninitialized_def)
3788 {
3789 if (dump_enabled_p ())
3790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3791 "use not simple.\n");
0136f8f0
AH
3792 return false;
3793 }
3794
3795 if (thisarginfo.dt == vect_constant_def
3796 || thisarginfo.dt == vect_external_def)
3797 gcc_assert (thisarginfo.vectype == NULL_TREE);
3798 else
3799 gcc_assert (thisarginfo.vectype != NULL_TREE);
3800
6c9e85fb
JJ
3801 /* For linear arguments, the analyze phase should have saved
3802 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3803 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3804 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3805 {
3806 gcc_assert (vec_stmt);
3807 thisarginfo.linear_step
17b658af 3808 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3809 thisarginfo.op
17b658af
JJ
3810 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3811 thisarginfo.simd_lane_linear
3812 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3813 == boolean_true_node);
6c9e85fb
JJ
3814 /* If loop has been peeled for alignment, we need to adjust it. */
3815 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3816 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3817 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3818 {
3819 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3820 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3821 tree opt = TREE_TYPE (thisarginfo.op);
3822 bias = fold_convert (TREE_TYPE (step), bias);
3823 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3824 thisarginfo.op
3825 = fold_build2 (POINTER_TYPE_P (opt)
3826 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3827 thisarginfo.op, bias);
3828 }
3829 }
3830 else if (!vec_stmt
3831 && thisarginfo.dt != vect_constant_def
3832 && thisarginfo.dt != vect_external_def
3833 && loop_vinfo
3834 && TREE_CODE (op) == SSA_NAME
3835 && simple_iv (loop, loop_containing_stmt (stmt), op,
3836 &iv, false)
3837 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3838 {
3839 thisarginfo.linear_step = tree_to_shwi (iv.step);
3840 thisarginfo.op = iv.base;
3841 }
3842 else if ((thisarginfo.dt == vect_constant_def
3843 || thisarginfo.dt == vect_external_def)
3844 && POINTER_TYPE_P (TREE_TYPE (op)))
3845 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3846 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3847 linear too. */
3848 if (POINTER_TYPE_P (TREE_TYPE (op))
3849 && !thisarginfo.linear_step
3850 && !vec_stmt
3851 && thisarginfo.dt != vect_constant_def
3852 && thisarginfo.dt != vect_external_def
3853 && loop_vinfo
3854 && !slp_node
3855 && TREE_CODE (op) == SSA_NAME)
3856 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3857
3858 arginfo.quick_push (thisarginfo);
3859 }
3860
d9f21f6a
RS
3861 unsigned HOST_WIDE_INT vf;
3862 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3863 {
3864 if (dump_enabled_p ())
3865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3866 "not considering SIMD clones; not yet supported"
3867 " for variable-width vectors.\n");
3868 return NULL;
3869 }
3870
0136f8f0
AH
3871 unsigned int badness = 0;
3872 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3873 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3874 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3875 else
3876 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3877 n = n->simdclone->next_clone)
3878 {
3879 unsigned int this_badness = 0;
d9f21f6a 3880 if (n->simdclone->simdlen > vf
0136f8f0
AH
3881 || n->simdclone->nargs != nargs)
3882 continue;
d9f21f6a
RS
3883 if (n->simdclone->simdlen < vf)
3884 this_badness += (exact_log2 (vf)
0136f8f0
AH
3885 - exact_log2 (n->simdclone->simdlen)) * 1024;
3886 if (n->simdclone->inbranch)
3887 this_badness += 2048;
3888 int target_badness = targetm.simd_clone.usable (n);
3889 if (target_badness < 0)
3890 continue;
3891 this_badness += target_badness * 512;
3892 /* FORNOW: Have to add code to add the mask argument. */
3893 if (n->simdclone->inbranch)
3894 continue;
3895 for (i = 0; i < nargs; i++)
3896 {
3897 switch (n->simdclone->args[i].arg_type)
3898 {
3899 case SIMD_CLONE_ARG_TYPE_VECTOR:
3900 if (!useless_type_conversion_p
3901 (n->simdclone->args[i].orig_type,
3902 TREE_TYPE (gimple_call_arg (stmt, i))))
3903 i = -1;
3904 else if (arginfo[i].dt == vect_constant_def
3905 || arginfo[i].dt == vect_external_def
3906 || arginfo[i].linear_step)
3907 this_badness += 64;
3908 break;
3909 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3910 if (arginfo[i].dt != vect_constant_def
3911 && arginfo[i].dt != vect_external_def)
3912 i = -1;
3913 break;
3914 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3915 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3916 if (arginfo[i].dt == vect_constant_def
3917 || arginfo[i].dt == vect_external_def
3918 || (arginfo[i].linear_step
3919 != n->simdclone->args[i].linear_step))
3920 i = -1;
3921 break;
3922 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3923 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3928 /* FORNOW */
3929 i = -1;
3930 break;
3931 case SIMD_CLONE_ARG_TYPE_MASK:
3932 gcc_unreachable ();
3933 }
3934 if (i == (size_t) -1)
3935 break;
3936 if (n->simdclone->args[i].alignment > arginfo[i].align)
3937 {
3938 i = -1;
3939 break;
3940 }
3941 if (arginfo[i].align)
3942 this_badness += (exact_log2 (arginfo[i].align)
3943 - exact_log2 (n->simdclone->args[i].alignment));
3944 }
3945 if (i == (size_t) -1)
3946 continue;
3947 if (bestn == NULL || this_badness < badness)
3948 {
3949 bestn = n;
3950 badness = this_badness;
3951 }
3952 }
3953
3954 if (bestn == NULL)
00426f9a 3955 return false;
0136f8f0
AH
3956
3957 for (i = 0; i < nargs; i++)
3958 if ((arginfo[i].dt == vect_constant_def
3959 || arginfo[i].dt == vect_external_def)
3960 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3961 {
3962 arginfo[i].vectype
3963 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3964 i)));
3965 if (arginfo[i].vectype == NULL
cf1b2ba4 3966 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3967 > bestn->simdclone->simdlen))
00426f9a 3968 return false;
0136f8f0
AH
3969 }
3970
3971 fndecl = bestn->decl;
3972 nunits = bestn->simdclone->simdlen;
d9f21f6a 3973 ncopies = vf / nunits;
0136f8f0
AH
3974
3975 /* If the function isn't const, only allow it in simd loops where user
3976 has asserted that at least nunits consecutive iterations can be
3977 performed using SIMD instructions. */
3978 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3979 && gimple_vuse (stmt))
00426f9a 3980 return false;
0136f8f0
AH
3981
3982 /* Sanity check: make sure that at least one copy of the vectorized stmt
3983 needs to be generated. */
3984 gcc_assert (ncopies >= 1);
3985
3986 if (!vec_stmt) /* transformation not required. */
3987 {
6c9e85fb
JJ
3988 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3989 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3990 if ((bestn->simdclone->args[i].arg_type
3991 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3992 || (bestn->simdclone->args[i].arg_type
3993 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3994 {
17b658af 3995 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3996 + 1);
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3998 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3999 ? size_type_node : TREE_TYPE (arginfo[i].op);
4000 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4001 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4002 tree sll = arginfo[i].simd_lane_linear
4003 ? boolean_true_node : boolean_false_node;
4004 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4005 }
0136f8f0 4006 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
adac3a68 4007 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
68435eb2 4008/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4009 return true;
4010 }
4011
67b8dbac 4012 /* Transform. */
0136f8f0
AH
4013
4014 if (dump_enabled_p ())
4015 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4016
4017 /* Handle def. */
4018 scalar_dest = gimple_call_lhs (stmt);
4019 vec_dest = NULL_TREE;
4020 rtype = NULL_TREE;
4021 ratype = NULL_TREE;
4022 if (scalar_dest)
4023 {
4024 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4025 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4026 if (TREE_CODE (rtype) == ARRAY_TYPE)
4027 {
4028 ratype = rtype;
4029 rtype = TREE_TYPE (ratype);
4030 }
4031 }
4032
4033 prev_stmt_info = NULL;
4034 for (j = 0; j < ncopies; ++j)
4035 {
4036 /* Build argument list for the vectorized call. */
4037 if (j == 0)
4038 vargs.create (nargs);
4039 else
4040 vargs.truncate (0);
4041
4042 for (i = 0; i < nargs; i++)
4043 {
4044 unsigned int k, l, m, o;
4045 tree atype;
4046 op = gimple_call_arg (stmt, i);
4047 switch (bestn->simdclone->args[i].arg_type)
4048 {
4049 case SIMD_CLONE_ARG_TYPE_VECTOR:
4050 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4051 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4052 for (m = j * o; m < (j + 1) * o; m++)
4053 {
cf1b2ba4
RS
4054 if (simd_clone_subparts (atype)
4055 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4056 {
73a699ae 4057 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4058 k = (simd_clone_subparts (arginfo[i].vectype)
4059 / simd_clone_subparts (atype));
0136f8f0
AH
4060 gcc_assert ((k & (k - 1)) == 0);
4061 if (m == 0)
4062 vec_oprnd0
81c40241 4063 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4064 else
4065 {
4066 vec_oprnd0 = arginfo[i].op;
4067 if ((m & (k - 1)) == 0)
4068 vec_oprnd0
4069 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4070 vec_oprnd0);
4071 }
4072 arginfo[i].op = vec_oprnd0;
4073 vec_oprnd0
4074 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4075 bitsize_int (prec),
0136f8f0
AH
4076 bitsize_int ((m & (k - 1)) * prec));
4077 new_stmt
b731b390 4078 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4079 vec_oprnd0);
4080 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4081 vargs.safe_push (gimple_assign_lhs (new_stmt));
4082 }
4083 else
4084 {
cf1b2ba4
RS
4085 k = (simd_clone_subparts (atype)
4086 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4087 gcc_assert ((k & (k - 1)) == 0);
4088 vec<constructor_elt, va_gc> *ctor_elts;
4089 if (k != 1)
4090 vec_alloc (ctor_elts, k);
4091 else
4092 ctor_elts = NULL;
4093 for (l = 0; l < k; l++)
4094 {
4095 if (m == 0 && l == 0)
4096 vec_oprnd0
81c40241 4097 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4098 else
4099 vec_oprnd0
4100 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4101 arginfo[i].op);
4102 arginfo[i].op = vec_oprnd0;
4103 if (k == 1)
4104 break;
4105 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4106 vec_oprnd0);
4107 }
4108 if (k == 1)
4109 vargs.safe_push (vec_oprnd0);
4110 else
4111 {
4112 vec_oprnd0 = build_constructor (atype, ctor_elts);
4113 new_stmt
b731b390 4114 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4115 vec_oprnd0);
4116 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4117 vargs.safe_push (gimple_assign_lhs (new_stmt));
4118 }
4119 }
4120 }
4121 break;
4122 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4123 vargs.safe_push (op);
4124 break;
4125 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4126 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4127 if (j == 0)
4128 {
4129 gimple_seq stmts;
4130 arginfo[i].op
4131 = force_gimple_operand (arginfo[i].op, &stmts, true,
4132 NULL_TREE);
4133 if (stmts != NULL)
4134 {
4135 basic_block new_bb;
4136 edge pe = loop_preheader_edge (loop);
4137 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4138 gcc_assert (!new_bb);
4139 }
17b658af
JJ
4140 if (arginfo[i].simd_lane_linear)
4141 {
4142 vargs.safe_push (arginfo[i].op);
4143 break;
4144 }
b731b390 4145 tree phi_res = copy_ssa_name (op);
538dd0b7 4146 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 4147 set_vinfo_for_stmt (new_phi,
310213d4 4148 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
4149 add_phi_arg (new_phi, arginfo[i].op,
4150 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4151 enum tree_code code
4152 = POINTER_TYPE_P (TREE_TYPE (op))
4153 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4154 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4155 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4156 widest_int cst
4157 = wi::mul (bestn->simdclone->args[i].linear_step,
4158 ncopies * nunits);
4159 tree tcst = wide_int_to_tree (type, cst);
b731b390 4160 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
4161 new_stmt
4162 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4163 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4164 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4165 set_vinfo_for_stmt (new_stmt,
310213d4 4166 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
4167 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4168 UNKNOWN_LOCATION);
4169 arginfo[i].op = phi_res;
4170 vargs.safe_push (phi_res);
4171 }
4172 else
4173 {
4174 enum tree_code code
4175 = POINTER_TYPE_P (TREE_TYPE (op))
4176 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4177 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4178 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4179 widest_int cst
4180 = wi::mul (bestn->simdclone->args[i].linear_step,
4181 j * nunits);
4182 tree tcst = wide_int_to_tree (type, cst);
b731b390 4183 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
4184 new_stmt = gimple_build_assign (new_temp, code,
4185 arginfo[i].op, tcst);
0136f8f0
AH
4186 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4187 vargs.safe_push (new_temp);
4188 }
4189 break;
7adb26f2
JJ
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4191 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4196 default:
4197 gcc_unreachable ();
4198 }
4199 }
4200
4201 new_stmt = gimple_build_call_vec (fndecl, vargs);
4202 if (vec_dest)
4203 {
cf1b2ba4 4204 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4205 if (ratype)
b731b390 4206 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4207 else if (simd_clone_subparts (vectype)
4208 == simd_clone_subparts (rtype))
0136f8f0
AH
4209 new_temp = make_ssa_name (vec_dest, new_stmt);
4210 else
4211 new_temp = make_ssa_name (rtype, new_stmt);
4212 gimple_call_set_lhs (new_stmt, new_temp);
4213 }
4214 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4215
4216 if (vec_dest)
4217 {
cf1b2ba4 4218 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4219 {
4220 unsigned int k, l;
73a699ae
RS
4221 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4222 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4223 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4224 gcc_assert ((k & (k - 1)) == 0);
4225 for (l = 0; l < k; l++)
4226 {
4227 tree t;
4228 if (ratype)
4229 {
4230 t = build_fold_addr_expr (new_temp);
4231 t = build2 (MEM_REF, vectype, t,
73a699ae 4232 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4233 }
4234 else
4235 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4236 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 4237 new_stmt
b731b390 4238 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
4239 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4240 if (j == 0 && l == 0)
4241 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4242 else
4243 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4244
4245 prev_stmt_info = vinfo_for_stmt (new_stmt);
4246 }
4247
4248 if (ratype)
3ba4ff41 4249 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4250 continue;
4251 }
cf1b2ba4 4252 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4253 {
cf1b2ba4
RS
4254 unsigned int k = (simd_clone_subparts (vectype)
4255 / simd_clone_subparts (rtype));
0136f8f0
AH
4256 gcc_assert ((k & (k - 1)) == 0);
4257 if ((j & (k - 1)) == 0)
4258 vec_alloc (ret_ctor_elts, k);
4259 if (ratype)
4260 {
cf1b2ba4 4261 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4262 for (m = 0; m < o; m++)
4263 {
4264 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4265 size_int (m), NULL_TREE, NULL_TREE);
4266 new_stmt
b731b390 4267 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
4268 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4269 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4270 gimple_assign_lhs (new_stmt));
4271 }
3ba4ff41 4272 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4273 }
4274 else
4275 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4276 if ((j & (k - 1)) != k - 1)
4277 continue;
4278 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4279 new_stmt
b731b390 4280 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
4281 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4282
4283 if ((unsigned) j == k - 1)
4284 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4285 else
4286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4287
4288 prev_stmt_info = vinfo_for_stmt (new_stmt);
4289 continue;
4290 }
4291 else if (ratype)
4292 {
4293 tree t = build_fold_addr_expr (new_temp);
4294 t = build2 (MEM_REF, vectype, t,
4295 build_int_cst (TREE_TYPE (t), 0));
4296 new_stmt
b731b390 4297 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0 4298 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4299 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4300 }
4301 }
4302
4303 if (j == 0)
4304 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4305 else
4306 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4307
4308 prev_stmt_info = vinfo_for_stmt (new_stmt);
4309 }
4310
4311 vargs.release ();
4312
4313 /* The call in STMT might prevent it from being removed in dce.
4314 We however cannot remove it here, due to the way the ssa name
4315 it defines is mapped to the new definition. So just replace
4316 rhs of the statement with something harmless. */
4317
4318 if (slp_node)
4319 return true;
4320
4321 if (scalar_dest)
4322 {
4323 type = TREE_TYPE (scalar_dest);
4324 if (is_pattern_stmt_p (stmt_info))
4325 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4326 else
4327 lhs = gimple_call_lhs (stmt);
4328 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4329 }
4330 else
4331 new_stmt = gimple_build_nop ();
4332 set_vinfo_for_stmt (new_stmt, stmt_info);
4333 set_vinfo_for_stmt (stmt, NULL);
4334 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4335 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4336 unlink_stmt_vdef (stmt);
4337
4338 return true;
4339}
4340
4341
ebfd146a
IR
4342/* Function vect_gen_widened_results_half
4343
4344 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4345 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4346 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4347 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4348 needs to be created (DECL is a function-decl of a target-builtin).
4349 STMT is the original scalar stmt that we are vectorizing. */
4350
355fe088 4351static gimple *
ebfd146a
IR
4352vect_gen_widened_results_half (enum tree_code code,
4353 tree decl,
4354 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4355 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4356 gimple *stmt)
b8698a0f 4357{
355fe088 4358 gimple *new_stmt;
b8698a0f
L
4359 tree new_temp;
4360
4361 /* Generate half of the widened result: */
4362 if (code == CALL_EXPR)
4363 {
4364 /* Target specific support */
ebfd146a
IR
4365 if (op_type == binary_op)
4366 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4367 else
4368 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4369 new_temp = make_ssa_name (vec_dest, new_stmt);
4370 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4371 }
4372 else
ebfd146a 4373 {
b8698a0f
L
4374 /* Generic support */
4375 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4376 if (op_type != binary_op)
4377 vec_oprnd1 = NULL;
0d0e4a03 4378 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4379 new_temp = make_ssa_name (vec_dest, new_stmt);
4380 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4381 }
ebfd146a
IR
4382 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4383
ebfd146a
IR
4384 return new_stmt;
4385}
4386
4a00c761
JJ
4387
4388/* Get vectorized definitions for loop-based vectorization. For the first
4389 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4390 scalar operand), and for the rest we get a copy with
4391 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4392 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4393 The vectors are collected into VEC_OPRNDS. */
4394
4395static void
355fe088 4396vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4397 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4398{
4399 tree vec_oprnd;
4400
4401 /* Get first vector operand. */
4402 /* All the vector operands except the very first one (that is scalar oprnd)
4403 are stmt copies. */
4404 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4405 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4406 else
4407 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4408
9771b263 4409 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4410
4411 /* Get second vector operand. */
4412 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4413 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4414
4415 *oprnd = vec_oprnd;
4416
4417 /* For conversion in multiple steps, continue to get operands
4418 recursively. */
4419 if (multi_step_cvt)
4420 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4421}
4422
4423
4424/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4425 For multi-step conversions store the resulting vectors and call the function
4426 recursively. */
4427
4428static void
9771b263 4429vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4430 int multi_step_cvt, gimple *stmt,
9771b263 4431 vec<tree> vec_dsts,
4a00c761
JJ
4432 gimple_stmt_iterator *gsi,
4433 slp_tree slp_node, enum tree_code code,
4434 stmt_vec_info *prev_stmt_info)
4435{
4436 unsigned int i;
4437 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4438 gimple *new_stmt;
4a00c761
JJ
4439 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4440
9771b263 4441 vec_dest = vec_dsts.pop ();
4a00c761 4442
9771b263 4443 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4444 {
4445 /* Create demotion operation. */
9771b263
DN
4446 vop0 = (*vec_oprnds)[i];
4447 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4448 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4449 new_tmp = make_ssa_name (vec_dest, new_stmt);
4450 gimple_assign_set_lhs (new_stmt, new_tmp);
4451 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4452
4453 if (multi_step_cvt)
4454 /* Store the resulting vector for next recursive call. */
9771b263 4455 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4456 else
4457 {
4458 /* This is the last step of the conversion sequence. Store the
4459 vectors in SLP_NODE or in vector info of the scalar statement
4460 (or in STMT_VINFO_RELATED_STMT chain). */
4461 if (slp_node)
9771b263 4462 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4463 else
c689ce1e
RB
4464 {
4465 if (!*prev_stmt_info)
4466 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4467 else
4468 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4469
c689ce1e
RB
4470 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4471 }
4a00c761
JJ
4472 }
4473 }
4474
4475 /* For multi-step demotion operations we first generate demotion operations
4476 from the source type to the intermediate types, and then combine the
4477 results (stored in VEC_OPRNDS) in demotion operation to the destination
4478 type. */
4479 if (multi_step_cvt)
4480 {
4481 /* At each level of recursion we have half of the operands we had at the
4482 previous level. */
9771b263 4483 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4484 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4485 stmt, vec_dsts, gsi, slp_node,
4486 VEC_PACK_TRUNC_EXPR,
4487 prev_stmt_info);
4488 }
4489
9771b263 4490 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4491}
4492
4493
4494/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4495 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4496 the resulting vectors and call the function recursively. */
4497
4498static void
9771b263
DN
4499vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4500 vec<tree> *vec_oprnds1,
355fe088 4501 gimple *stmt, tree vec_dest,
4a00c761
JJ
4502 gimple_stmt_iterator *gsi,
4503 enum tree_code code1,
4504 enum tree_code code2, tree decl1,
4505 tree decl2, int op_type)
4506{
4507 int i;
4508 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4509 gimple *new_stmt1, *new_stmt2;
6e1aa848 4510 vec<tree> vec_tmp = vNULL;
4a00c761 4511
9771b263
DN
4512 vec_tmp.create (vec_oprnds0->length () * 2);
4513 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4514 {
4515 if (op_type == binary_op)
9771b263 4516 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4517 else
4518 vop1 = NULL_TREE;
4519
4520 /* Generate the two halves of promotion operation. */
4521 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4522 op_type, vec_dest, gsi, stmt);
4523 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4524 op_type, vec_dest, gsi, stmt);
4525 if (is_gimple_call (new_stmt1))
4526 {
4527 new_tmp1 = gimple_call_lhs (new_stmt1);
4528 new_tmp2 = gimple_call_lhs (new_stmt2);
4529 }
4530 else
4531 {
4532 new_tmp1 = gimple_assign_lhs (new_stmt1);
4533 new_tmp2 = gimple_assign_lhs (new_stmt2);
4534 }
4535
4536 /* Store the results for the next step. */
9771b263
DN
4537 vec_tmp.quick_push (new_tmp1);
4538 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4539 }
4540
689eaba3 4541 vec_oprnds0->release ();
4a00c761
JJ
4542 *vec_oprnds0 = vec_tmp;
4543}
4544
4545
b8698a0f
L
4546/* Check if STMT performs a conversion operation, that can be vectorized.
4547 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4548 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4549 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4550
4551static bool
355fe088 4552vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
4553 gimple **vec_stmt, slp_tree slp_node,
4554 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4555{
4556 tree vec_dest;
4557 tree scalar_dest;
4a00c761 4558 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4559 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4560 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4561 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4562 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4563 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4564 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4565 tree new_temp;
ebfd146a 4566 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4567 int ndts = 2;
355fe088 4568 gimple *new_stmt = NULL;
ebfd146a 4569 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4570 poly_uint64 nunits_in;
4571 poly_uint64 nunits_out;
ebfd146a 4572 tree vectype_out, vectype_in;
4a00c761
JJ
4573 int ncopies, i, j;
4574 tree lhs_type, rhs_type;
ebfd146a 4575 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4576 vec<tree> vec_oprnds0 = vNULL;
4577 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4578 tree vop0;
4a00c761 4579 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4580 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4581 int multi_step_cvt = 0;
6e1aa848 4582 vec<tree> interm_types = vNULL;
4a00c761
JJ
4583 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4584 int op_type;
4a00c761 4585 unsigned short fltsz;
ebfd146a
IR
4586
4587 /* Is STMT a vectorizable conversion? */
4588
4a00c761 4589 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4590 return false;
4591
66c16fd9
RB
4592 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4593 && ! vec_stmt)
ebfd146a
IR
4594 return false;
4595
4596 if (!is_gimple_assign (stmt))
4597 return false;
4598
4599 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4600 return false;
4601
4602 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4603 if (!CONVERT_EXPR_CODE_P (code)
4604 && code != FIX_TRUNC_EXPR
4605 && code != FLOAT_EXPR
4606 && code != WIDEN_MULT_EXPR
4607 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4608 return false;
4609
4a00c761
JJ
4610 op_type = TREE_CODE_LENGTH (code);
4611
ebfd146a 4612 /* Check types of lhs and rhs. */
b690cc0f 4613 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4614 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4615 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4616
ebfd146a
IR
4617 op0 = gimple_assign_rhs1 (stmt);
4618 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4619
4620 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4621 && !((INTEGRAL_TYPE_P (lhs_type)
4622 && INTEGRAL_TYPE_P (rhs_type))
4623 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4624 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4625 return false;
4626
e6f5c25d
IE
4627 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4628 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4629 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4630 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4631 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4632 {
73fbfcad 4633 if (dump_enabled_p ())
78c60e3d 4634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4635 "type conversion to/from bit-precision unsupported."
4636 "\n");
4a00c761
JJ
4637 return false;
4638 }
4639
b690cc0f 4640 /* Check the operands of the operation. */
894dd753 4641 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
b690cc0f 4642 {
73fbfcad 4643 if (dump_enabled_p ())
78c60e3d 4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4645 "use not simple.\n");
b690cc0f
RG
4646 return false;
4647 }
4a00c761
JJ
4648 if (op_type == binary_op)
4649 {
4650 bool ok;
4651
4652 op1 = gimple_assign_rhs2 (stmt);
4653 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4654 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4655 OP1. */
4656 if (CONSTANT_CLASS_P (op0))
894dd753 4657 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4a00c761 4658 else
894dd753 4659 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4a00c761
JJ
4660
4661 if (!ok)
4662 {
73fbfcad 4663 if (dump_enabled_p ())
78c60e3d 4664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4665 "use not simple.\n");
4a00c761
JJ
4666 return false;
4667 }
4668 }
4669
b690cc0f
RG
4670 /* If op0 is an external or constant defs use a vector type of
4671 the same size as the output vector type. */
ebfd146a 4672 if (!vectype_in)
b690cc0f 4673 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4674 if (vec_stmt)
4675 gcc_assert (vectype_in);
4676 if (!vectype_in)
4677 {
73fbfcad 4678 if (dump_enabled_p ())
4a00c761 4679 {
78c60e3d
SS
4680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4681 "no vectype for scalar type ");
4682 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4683 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4684 }
7d8930a0
IR
4685
4686 return false;
4687 }
ebfd146a 4688
e6f5c25d
IE
4689 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4690 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4691 {
4692 if (dump_enabled_p ())
4693 {
4694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4695 "can't convert between boolean and non "
4696 "boolean vectors");
4697 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4698 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4699 }
4700
4701 return false;
4702 }
4703
b690cc0f
RG
4704 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4705 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4706 if (known_eq (nunits_out, nunits_in))
ebfd146a 4707 modifier = NONE;
062d5ccc
RS
4708 else if (multiple_p (nunits_out, nunits_in))
4709 modifier = NARROW;
ebfd146a 4710 else
062d5ccc
RS
4711 {
4712 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4713 modifier = WIDEN;
4714 }
ebfd146a 4715
ff802fa1
IR
4716 /* Multiple types in SLP are handled by creating the appropriate number of
4717 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4718 case of SLP. */
fce57248 4719 if (slp_node)
ebfd146a 4720 ncopies = 1;
4a00c761 4721 else if (modifier == NARROW)
e8f142e2 4722 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4723 else
e8f142e2 4724 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4725
ebfd146a
IR
4726 /* Sanity check: make sure that at least one copy of the vectorized stmt
4727 needs to be generated. */
4728 gcc_assert (ncopies >= 1);
4729
16d22000
RS
4730 bool found_mode = false;
4731 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4732 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4733 opt_scalar_mode rhs_mode_iter;
b397965c 4734
ebfd146a 4735 /* Supportable by target? */
4a00c761 4736 switch (modifier)
ebfd146a 4737 {
4a00c761
JJ
4738 case NONE:
4739 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4740 return false;
4741 if (supportable_convert_operation (code, vectype_out, vectype_in,
4742 &decl1, &code1))
4743 break;
4744 /* FALLTHRU */
4745 unsupported:
73fbfcad 4746 if (dump_enabled_p ())
78c60e3d 4747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4748 "conversion not supported by target.\n");
ebfd146a 4749 return false;
ebfd146a 4750
4a00c761
JJ
4751 case WIDEN:
4752 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4753 &code1, &code2, &multi_step_cvt,
4754 &interm_types))
4a00c761
JJ
4755 {
4756 /* Binary widening operation can only be supported directly by the
4757 architecture. */
4758 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4759 break;
4760 }
4761
4762 if (code != FLOAT_EXPR
b397965c 4763 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4764 goto unsupported;
4765
b397965c 4766 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4767 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4768 {
16d22000 4769 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4770 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4771 break;
4772
4a00c761
JJ
4773 cvt_type
4774 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4775 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4776 if (cvt_type == NULL_TREE)
4777 goto unsupported;
4778
4779 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4780 {
4781 if (!supportable_convert_operation (code, vectype_out,
4782 cvt_type, &decl1, &codecvt1))
4783 goto unsupported;
4784 }
4785 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4786 cvt_type, &codecvt1,
4787 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4788 &interm_types))
4789 continue;
4790 else
4791 gcc_assert (multi_step_cvt == 0);
4792
4793 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4794 vectype_in, &code1, &code2,
4795 &multi_step_cvt, &interm_types))
16d22000
RS
4796 {
4797 found_mode = true;
4798 break;
4799 }
4a00c761
JJ
4800 }
4801
16d22000 4802 if (!found_mode)
4a00c761
JJ
4803 goto unsupported;
4804
4805 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4806 codecvt2 = ERROR_MARK;
4807 else
4808 {
4809 multi_step_cvt++;
9771b263 4810 interm_types.safe_push (cvt_type);
4a00c761
JJ
4811 cvt_type = NULL_TREE;
4812 }
4813 break;
4814
4815 case NARROW:
4816 gcc_assert (op_type == unary_op);
4817 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4818 &code1, &multi_step_cvt,
4819 &interm_types))
4820 break;
4821
4822 if (code != FIX_TRUNC_EXPR
b397965c 4823 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4824 goto unsupported;
4825
4a00c761
JJ
4826 cvt_type
4827 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4828 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4829 if (cvt_type == NULL_TREE)
4830 goto unsupported;
4831 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4832 &decl1, &codecvt1))
4833 goto unsupported;
4834 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4835 &code1, &multi_step_cvt,
4836 &interm_types))
4837 break;
4838 goto unsupported;
4839
4840 default:
4841 gcc_unreachable ();
ebfd146a
IR
4842 }
4843
4844 if (!vec_stmt) /* transformation not required. */
4845 {
adac3a68 4846 DUMP_VECT_SCOPE ("vectorizable_conversion");
4a00c761 4847 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4848 {
4849 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4850 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4851 cost_vec);
8bd37302 4852 }
4a00c761
JJ
4853 else if (modifier == NARROW)
4854 {
4855 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4856 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4857 cost_vec);
4a00c761
JJ
4858 }
4859 else
4860 {
4861 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4862 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4863 cost_vec);
4a00c761 4864 }
9771b263 4865 interm_types.release ();
ebfd146a
IR
4866 return true;
4867 }
4868
67b8dbac 4869 /* Transform. */
73fbfcad 4870 if (dump_enabled_p ())
78c60e3d 4871 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4872 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4873
4a00c761
JJ
4874 if (op_type == binary_op)
4875 {
4876 if (CONSTANT_CLASS_P (op0))
4877 op0 = fold_convert (TREE_TYPE (op1), op0);
4878 else if (CONSTANT_CLASS_P (op1))
4879 op1 = fold_convert (TREE_TYPE (op0), op1);
4880 }
4881
4882 /* In case of multi-step conversion, we first generate conversion operations
4883 to the intermediate types, and then from that types to the final one.
4884 We create vector destinations for the intermediate type (TYPES) received
4885 from supportable_*_operation, and store them in the correct order
4886 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4887 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4888 vec_dest = vect_create_destination_var (scalar_dest,
4889 (cvt_type && modifier == WIDEN)
4890 ? cvt_type : vectype_out);
9771b263 4891 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4892
4893 if (multi_step_cvt)
4894 {
9771b263
DN
4895 for (i = interm_types.length () - 1;
4896 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4897 {
4898 vec_dest = vect_create_destination_var (scalar_dest,
4899 intermediate_type);
9771b263 4900 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4901 }
4902 }
ebfd146a 4903
4a00c761 4904 if (cvt_type)
82294ec1
JJ
4905 vec_dest = vect_create_destination_var (scalar_dest,
4906 modifier == WIDEN
4907 ? vectype_out : cvt_type);
4a00c761
JJ
4908
4909 if (!slp_node)
4910 {
30862efc 4911 if (modifier == WIDEN)
4a00c761 4912 {
c3284718 4913 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4914 if (op_type == binary_op)
9771b263 4915 vec_oprnds1.create (1);
4a00c761 4916 }
30862efc 4917 else if (modifier == NARROW)
9771b263
DN
4918 vec_oprnds0.create (
4919 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4920 }
4921 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4922 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4923
4a00c761 4924 last_oprnd = op0;
ebfd146a
IR
4925 prev_stmt_info = NULL;
4926 switch (modifier)
4927 {
4928 case NONE:
4929 for (j = 0; j < ncopies; j++)
4930 {
ebfd146a 4931 if (j == 0)
306b0c92 4932 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4933 else
4934 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4935
9771b263 4936 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4937 {
4938 /* Arguments are ready, create the new vector stmt. */
4939 if (code1 == CALL_EXPR)
4940 {
4941 new_stmt = gimple_build_call (decl1, 1, vop0);
4942 new_temp = make_ssa_name (vec_dest, new_stmt);
4943 gimple_call_set_lhs (new_stmt, new_temp);
4944 }
4945 else
4946 {
4947 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4948 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4949 new_temp = make_ssa_name (vec_dest, new_stmt);
4950 gimple_assign_set_lhs (new_stmt, new_temp);
4951 }
4952
4953 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4954 if (slp_node)
9771b263 4955 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4956 else
4957 {
4958 if (!prev_stmt_info)
4959 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4960 else
4961 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4962 prev_stmt_info = vinfo_for_stmt (new_stmt);
4963 }
4a00c761 4964 }
ebfd146a
IR
4965 }
4966 break;
4967
4968 case WIDEN:
4969 /* In case the vectorization factor (VF) is bigger than the number
4970 of elements that we can fit in a vectype (nunits), we have to
4971 generate more than one vector stmt - i.e - we need to "unroll"
4972 the vector stmt by a factor VF/nunits. */
4973 for (j = 0; j < ncopies; j++)
4974 {
4a00c761 4975 /* Handle uses. */
ebfd146a 4976 if (j == 0)
4a00c761
JJ
4977 {
4978 if (slp_node)
4979 {
4980 if (code == WIDEN_LSHIFT_EXPR)
4981 {
4982 unsigned int k;
ebfd146a 4983
4a00c761
JJ
4984 vec_oprnd1 = op1;
4985 /* Store vec_oprnd1 for every vector stmt to be created
4986 for SLP_NODE. We check during the analysis that all
4987 the shift arguments are the same. */
4988 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4989 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4990
4991 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4992 slp_node);
4a00c761
JJ
4993 }
4994 else
4995 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4996 &vec_oprnds1, slp_node);
4a00c761
JJ
4997 }
4998 else
4999 {
81c40241 5000 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 5001 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5002 if (op_type == binary_op)
5003 {
5004 if (code == WIDEN_LSHIFT_EXPR)
5005 vec_oprnd1 = op1;
5006 else
81c40241 5007 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 5008 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5009 }
5010 }
5011 }
ebfd146a 5012 else
4a00c761
JJ
5013 {
5014 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
5015 vec_oprnds0.truncate (0);
5016 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5017 if (op_type == binary_op)
5018 {
5019 if (code == WIDEN_LSHIFT_EXPR)
5020 vec_oprnd1 = op1;
5021 else
5022 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5023 vec_oprnd1);
9771b263
DN
5024 vec_oprnds1.truncate (0);
5025 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5026 }
5027 }
ebfd146a 5028
4a00c761
JJ
5029 /* Arguments are ready. Create the new vector stmts. */
5030 for (i = multi_step_cvt; i >= 0; i--)
5031 {
9771b263 5032 tree this_dest = vec_dsts[i];
4a00c761
JJ
5033 enum tree_code c1 = code1, c2 = code2;
5034 if (i == 0 && codecvt2 != ERROR_MARK)
5035 {
5036 c1 = codecvt1;
5037 c2 = codecvt2;
5038 }
5039 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5040 &vec_oprnds1,
5041 stmt, this_dest, gsi,
5042 c1, c2, decl1, decl2,
5043 op_type);
5044 }
5045
9771b263 5046 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5047 {
5048 if (cvt_type)
5049 {
5050 if (codecvt1 == CALL_EXPR)
5051 {
5052 new_stmt = gimple_build_call (decl1, 1, vop0);
5053 new_temp = make_ssa_name (vec_dest, new_stmt);
5054 gimple_call_set_lhs (new_stmt, new_temp);
5055 }
5056 else
5057 {
5058 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5059 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5060 new_stmt = gimple_build_assign (new_temp, codecvt1,
5061 vop0);
4a00c761
JJ
5062 }
5063
5064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5065 }
5066 else
5067 new_stmt = SSA_NAME_DEF_STMT (vop0);
5068
5069 if (slp_node)
9771b263 5070 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 5071 else
c689ce1e
RB
5072 {
5073 if (!prev_stmt_info)
5074 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5075 else
5076 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5077 prev_stmt_info = vinfo_for_stmt (new_stmt);
5078 }
4a00c761 5079 }
ebfd146a 5080 }
4a00c761
JJ
5081
5082 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5083 break;
5084
5085 case NARROW:
5086 /* In case the vectorization factor (VF) is bigger than the number
5087 of elements that we can fit in a vectype (nunits), we have to
5088 generate more than one vector stmt - i.e - we need to "unroll"
5089 the vector stmt by a factor VF/nunits. */
5090 for (j = 0; j < ncopies; j++)
5091 {
5092 /* Handle uses. */
4a00c761
JJ
5093 if (slp_node)
5094 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5095 slp_node);
ebfd146a
IR
5096 else
5097 {
9771b263 5098 vec_oprnds0.truncate (0);
4a00c761
JJ
5099 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5100 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5101 }
5102
4a00c761
JJ
5103 /* Arguments are ready. Create the new vector stmts. */
5104 if (cvt_type)
9771b263 5105 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5106 {
5107 if (codecvt1 == CALL_EXPR)
5108 {
5109 new_stmt = gimple_build_call (decl1, 1, vop0);
5110 new_temp = make_ssa_name (vec_dest, new_stmt);
5111 gimple_call_set_lhs (new_stmt, new_temp);
5112 }
5113 else
5114 {
5115 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5116 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5117 new_stmt = gimple_build_assign (new_temp, codecvt1,
5118 vop0);
4a00c761 5119 }
ebfd146a 5120
4a00c761 5121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 5122 vec_oprnds0[i] = new_temp;
4a00c761 5123 }
ebfd146a 5124
4a00c761
JJ
5125 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5126 stmt, vec_dsts, gsi,
5127 slp_node, code1,
5128 &prev_stmt_info);
ebfd146a
IR
5129 }
5130
5131 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5132 break;
ebfd146a
IR
5133 }
5134
9771b263
DN
5135 vec_oprnds0.release ();
5136 vec_oprnds1.release ();
9771b263 5137 interm_types.release ();
ebfd146a
IR
5138
5139 return true;
5140}
ff802fa1
IR
5141
5142
ebfd146a
IR
5143/* Function vectorizable_assignment.
5144
b8698a0f
L
5145 Check if STMT performs an assignment (copy) that can be vectorized.
5146 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5147 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5148 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5149
5150static bool
355fe088 5151vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5152 gimple **vec_stmt, slp_tree slp_node,
5153 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5154{
5155 tree vec_dest;
5156 tree scalar_dest;
5157 tree op;
5158 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5159 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5160 tree new_temp;
4fc5ebf1
JG
5161 enum vect_def_type dt[1] = {vect_unknown_def_type};
5162 int ndts = 1;
ebfd146a 5163 int ncopies;
f18b55bd 5164 int i, j;
6e1aa848 5165 vec<tree> vec_oprnds = vNULL;
ebfd146a 5166 tree vop;
a70d6342 5167 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5168 vec_info *vinfo = stmt_info->vinfo;
355fe088 5169 gimple *new_stmt = NULL;
f18b55bd 5170 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5171 enum tree_code code;
5172 tree vectype_in;
ebfd146a 5173
a70d6342 5174 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5175 return false;
5176
66c16fd9
RB
5177 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5178 && ! vec_stmt)
ebfd146a
IR
5179 return false;
5180
5181 /* Is vectorizable assignment? */
5182 if (!is_gimple_assign (stmt))
5183 return false;
5184
5185 scalar_dest = gimple_assign_lhs (stmt);
5186 if (TREE_CODE (scalar_dest) != SSA_NAME)
5187 return false;
5188
fde9c428 5189 code = gimple_assign_rhs_code (stmt);
ebfd146a 5190 if (gimple_assign_single_p (stmt)
fde9c428
RG
5191 || code == PAREN_EXPR
5192 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5193 op = gimple_assign_rhs1 (stmt);
5194 else
5195 return false;
5196
7b7ec6c5
RG
5197 if (code == VIEW_CONVERT_EXPR)
5198 op = TREE_OPERAND (op, 0);
5199
465c8c19 5200 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5201 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5202
5203 /* Multiple types in SLP are handled by creating the appropriate number of
5204 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5205 case of SLP. */
fce57248 5206 if (slp_node)
465c8c19
JJ
5207 ncopies = 1;
5208 else
e8f142e2 5209 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5210
5211 gcc_assert (ncopies >= 1);
5212
894dd753 5213 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
ebfd146a 5214 {
73fbfcad 5215 if (dump_enabled_p ())
78c60e3d 5216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5217 "use not simple.\n");
ebfd146a
IR
5218 return false;
5219 }
5220
fde9c428
RG
5221 /* We can handle NOP_EXPR conversions that do not change the number
5222 of elements or the vector size. */
7b7ec6c5
RG
5223 if ((CONVERT_EXPR_CODE_P (code)
5224 || code == VIEW_CONVERT_EXPR)
fde9c428 5225 && (!vectype_in
928686b1 5226 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5227 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5228 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5229 return false;
5230
7b7b1813
RG
5231 /* We do not handle bit-precision changes. */
5232 if ((CONVERT_EXPR_CODE_P (code)
5233 || code == VIEW_CONVERT_EXPR)
5234 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5235 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5236 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5237 /* But a conversion that does not change the bit-pattern is ok. */
5238 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5239 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5240 && TYPE_UNSIGNED (TREE_TYPE (op)))
5241 /* Conversion between boolean types of different sizes is
5242 a simple assignment in case their vectypes are same
5243 boolean vectors. */
5244 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5245 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5246 {
73fbfcad 5247 if (dump_enabled_p ())
78c60e3d
SS
5248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5249 "type conversion to/from bit-precision "
e645e942 5250 "unsupported.\n");
7b7b1813
RG
5251 return false;
5252 }
5253
ebfd146a
IR
5254 if (!vec_stmt) /* transformation not required. */
5255 {
5256 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
adac3a68 5257 DUMP_VECT_SCOPE ("vectorizable_assignment");
68435eb2 5258 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5259 return true;
5260 }
5261
67b8dbac 5262 /* Transform. */
73fbfcad 5263 if (dump_enabled_p ())
e645e942 5264 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5265
5266 /* Handle def. */
5267 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5268
5269 /* Handle use. */
f18b55bd 5270 for (j = 0; j < ncopies; j++)
ebfd146a 5271 {
f18b55bd
IR
5272 /* Handle uses. */
5273 if (j == 0)
306b0c92 5274 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5275 else
5276 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5277
5278 /* Arguments are ready. create the new vector stmt. */
9771b263 5279 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5280 {
7b7ec6c5
RG
5281 if (CONVERT_EXPR_CODE_P (code)
5282 || code == VIEW_CONVERT_EXPR)
4a73490d 5283 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
5284 new_stmt = gimple_build_assign (vec_dest, vop);
5285 new_temp = make_ssa_name (vec_dest, new_stmt);
5286 gimple_assign_set_lhs (new_stmt, new_temp);
5287 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5288 if (slp_node)
9771b263 5289 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 5290 }
ebfd146a
IR
5291
5292 if (slp_node)
f18b55bd
IR
5293 continue;
5294
5295 if (j == 0)
5296 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5297 else
5298 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5299
5300 prev_stmt_info = vinfo_for_stmt (new_stmt);
5301 }
b8698a0f 5302
9771b263 5303 vec_oprnds.release ();
ebfd146a
IR
5304 return true;
5305}
5306
9dc3f7de 5307
1107f3ae
IR
5308/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5309 either as shift by a scalar or by a vector. */
5310
5311bool
5312vect_supportable_shift (enum tree_code code, tree scalar_type)
5313{
5314
ef4bddc2 5315 machine_mode vec_mode;
1107f3ae
IR
5316 optab optab;
5317 int icode;
5318 tree vectype;
5319
5320 vectype = get_vectype_for_scalar_type (scalar_type);
5321 if (!vectype)
5322 return false;
5323
5324 optab = optab_for_tree_code (code, vectype, optab_scalar);
5325 if (!optab
5326 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5327 {
5328 optab = optab_for_tree_code (code, vectype, optab_vector);
5329 if (!optab
5330 || (optab_handler (optab, TYPE_MODE (vectype))
5331 == CODE_FOR_nothing))
5332 return false;
5333 }
5334
5335 vec_mode = TYPE_MODE (vectype);
5336 icode = (int) optab_handler (optab, vec_mode);
5337 if (icode == CODE_FOR_nothing)
5338 return false;
5339
5340 return true;
5341}
5342
5343
9dc3f7de
IR
5344/* Function vectorizable_shift.
5345
5346 Check if STMT performs a shift operation that can be vectorized.
5347 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5348 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5349 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5350
5351static bool
355fe088 5352vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5353 gimple **vec_stmt, slp_tree slp_node,
5354 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5355{
5356 tree vec_dest;
5357 tree scalar_dest;
5358 tree op0, op1 = NULL;
5359 tree vec_oprnd1 = NULL_TREE;
5360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5361 tree vectype;
5362 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5363 enum tree_code code;
ef4bddc2 5364 machine_mode vec_mode;
9dc3f7de
IR
5365 tree new_temp;
5366 optab optab;
5367 int icode;
ef4bddc2 5368 machine_mode optab_op2_mode;
9dc3f7de 5369 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5370 int ndts = 2;
355fe088 5371 gimple *new_stmt = NULL;
9dc3f7de 5372 stmt_vec_info prev_stmt_info;
928686b1
RS
5373 poly_uint64 nunits_in;
5374 poly_uint64 nunits_out;
9dc3f7de 5375 tree vectype_out;
cede2577 5376 tree op1_vectype;
9dc3f7de
IR
5377 int ncopies;
5378 int j, i;
6e1aa848
DN
5379 vec<tree> vec_oprnds0 = vNULL;
5380 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5381 tree vop0, vop1;
5382 unsigned int k;
49eab32e 5383 bool scalar_shift_arg = true;
9dc3f7de 5384 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5385 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5386
5387 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5388 return false;
5389
66c16fd9
RB
5390 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5391 && ! vec_stmt)
9dc3f7de
IR
5392 return false;
5393
5394 /* Is STMT a vectorizable binary/unary operation? */
5395 if (!is_gimple_assign (stmt))
5396 return false;
5397
5398 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5399 return false;
5400
5401 code = gimple_assign_rhs_code (stmt);
5402
5403 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5404 || code == RROTATE_EXPR))
5405 return false;
5406
5407 scalar_dest = gimple_assign_lhs (stmt);
5408 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5409 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5410 {
73fbfcad 5411 if (dump_enabled_p ())
78c60e3d 5412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5413 "bit-precision shifts not supported.\n");
7b7b1813
RG
5414 return false;
5415 }
9dc3f7de
IR
5416
5417 op0 = gimple_assign_rhs1 (stmt);
894dd753 5418 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
9dc3f7de 5419 {
73fbfcad 5420 if (dump_enabled_p ())
78c60e3d 5421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5422 "use not simple.\n");
9dc3f7de
IR
5423 return false;
5424 }
5425 /* If op0 is an external or constant def use a vector type with
5426 the same size as the output vector type. */
5427 if (!vectype)
5428 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5429 if (vec_stmt)
5430 gcc_assert (vectype);
5431 if (!vectype)
5432 {
73fbfcad 5433 if (dump_enabled_p ())
78c60e3d 5434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5435 "no vectype for scalar type\n");
9dc3f7de
IR
5436 return false;
5437 }
5438
5439 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5440 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5441 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5442 return false;
5443
5444 op1 = gimple_assign_rhs2 (stmt);
894dd753 5445 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype))
9dc3f7de 5446 {
73fbfcad 5447 if (dump_enabled_p ())
78c60e3d 5448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5449 "use not simple.\n");
9dc3f7de
IR
5450 return false;
5451 }
5452
9dc3f7de
IR
5453 /* Multiple types in SLP are handled by creating the appropriate number of
5454 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5455 case of SLP. */
fce57248 5456 if (slp_node)
9dc3f7de
IR
5457 ncopies = 1;
5458 else
e8f142e2 5459 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5460
5461 gcc_assert (ncopies >= 1);
5462
5463 /* Determine whether the shift amount is a vector, or scalar. If the
5464 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5465
dbfa87aa
YR
5466 if ((dt[1] == vect_internal_def
5467 || dt[1] == vect_induction_def)
5468 && !slp_node)
49eab32e
JJ
5469 scalar_shift_arg = false;
5470 else if (dt[1] == vect_constant_def
5471 || dt[1] == vect_external_def
5472 || dt[1] == vect_internal_def)
5473 {
5474 /* In SLP, need to check whether the shift count is the same,
5475 in loops if it is a constant or invariant, it is always
5476 a scalar shift. */
5477 if (slp_node)
5478 {
355fe088
TS
5479 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5480 gimple *slpstmt;
49eab32e 5481
9771b263 5482 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5483 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5484 scalar_shift_arg = false;
5485 }
60d393e8
RB
5486
5487 /* If the shift amount is computed by a pattern stmt we cannot
5488 use the scalar amount directly thus give up and use a vector
5489 shift. */
5490 if (dt[1] == vect_internal_def)
5491 {
5492 gimple *def = SSA_NAME_DEF_STMT (op1);
5493 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5494 scalar_shift_arg = false;
5495 }
49eab32e
JJ
5496 }
5497 else
5498 {
73fbfcad 5499 if (dump_enabled_p ())
78c60e3d 5500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5501 "operand mode requires invariant argument.\n");
49eab32e
JJ
5502 return false;
5503 }
5504
9dc3f7de 5505 /* Vector shifted by vector. */
49eab32e 5506 if (!scalar_shift_arg)
9dc3f7de
IR
5507 {
5508 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5509 if (dump_enabled_p ())
78c60e3d 5510 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5511 "vector/vector shift/rotate found.\n");
78c60e3d 5512
aa948027
JJ
5513 if (!op1_vectype)
5514 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5515 if (op1_vectype == NULL_TREE
5516 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5517 {
73fbfcad 5518 if (dump_enabled_p ())
78c60e3d
SS
5519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520 "unusable type for last operand in"
e645e942 5521 " vector/vector shift/rotate.\n");
cede2577
JJ
5522 return false;
5523 }
9dc3f7de
IR
5524 }
5525 /* See if the machine has a vector shifted by scalar insn and if not
5526 then see if it has a vector shifted by vector insn. */
49eab32e 5527 else
9dc3f7de
IR
5528 {
5529 optab = optab_for_tree_code (code, vectype, optab_scalar);
5530 if (optab
5531 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5532 {
73fbfcad 5533 if (dump_enabled_p ())
78c60e3d 5534 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5535 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5536 }
5537 else
5538 {
5539 optab = optab_for_tree_code (code, vectype, optab_vector);
5540 if (optab
5541 && (optab_handler (optab, TYPE_MODE (vectype))
5542 != CODE_FOR_nothing))
5543 {
49eab32e
JJ
5544 scalar_shift_arg = false;
5545
73fbfcad 5546 if (dump_enabled_p ())
78c60e3d 5547 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5548 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5549
5550 /* Unlike the other binary operators, shifts/rotates have
5551 the rhs being int, instead of the same type as the lhs,
5552 so make sure the scalar is the right type if we are
aa948027 5553 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5554 if (dt[1] == vect_constant_def)
5555 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5556 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5557 TREE_TYPE (op1)))
5558 {
5559 if (slp_node
5560 && TYPE_MODE (TREE_TYPE (vectype))
5561 != TYPE_MODE (TREE_TYPE (op1)))
5562 {
73fbfcad 5563 if (dump_enabled_p ())
78c60e3d
SS
5564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5565 "unusable type for last operand in"
e645e942 5566 " vector/vector shift/rotate.\n");
21c0a521 5567 return false;
aa948027
JJ
5568 }
5569 if (vec_stmt && !slp_node)
5570 {
5571 op1 = fold_convert (TREE_TYPE (vectype), op1);
5572 op1 = vect_init_vector (stmt, op1,
5573 TREE_TYPE (vectype), NULL);
5574 }
5575 }
9dc3f7de
IR
5576 }
5577 }
5578 }
9dc3f7de
IR
5579
5580 /* Supportable by target? */
5581 if (!optab)
5582 {
73fbfcad 5583 if (dump_enabled_p ())
78c60e3d 5584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5585 "no optab.\n");
9dc3f7de
IR
5586 return false;
5587 }
5588 vec_mode = TYPE_MODE (vectype);
5589 icode = (int) optab_handler (optab, vec_mode);
5590 if (icode == CODE_FOR_nothing)
5591 {
73fbfcad 5592 if (dump_enabled_p ())
78c60e3d 5593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5594 "op not supported by target.\n");
9dc3f7de 5595 /* Check only during analysis. */
cf098191 5596 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5597 || (!vec_stmt
5598 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5599 return false;
73fbfcad 5600 if (dump_enabled_p ())
e645e942
TJ
5601 dump_printf_loc (MSG_NOTE, vect_location,
5602 "proceeding using word mode.\n");
9dc3f7de
IR
5603 }
5604
5605 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5606 if (!vec_stmt
5607 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5608 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5609 {
73fbfcad 5610 if (dump_enabled_p ())
78c60e3d 5611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5612 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5613 return false;
5614 }
5615
5616 if (!vec_stmt) /* transformation not required. */
5617 {
5618 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
adac3a68 5619 DUMP_VECT_SCOPE ("vectorizable_shift");
68435eb2 5620 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5621 return true;
5622 }
5623
67b8dbac 5624 /* Transform. */
9dc3f7de 5625
73fbfcad 5626 if (dump_enabled_p ())
78c60e3d 5627 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5628 "transform binary/unary operation.\n");
9dc3f7de
IR
5629
5630 /* Handle def. */
5631 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5632
9dc3f7de
IR
5633 prev_stmt_info = NULL;
5634 for (j = 0; j < ncopies; j++)
5635 {
5636 /* Handle uses. */
5637 if (j == 0)
5638 {
5639 if (scalar_shift_arg)
5640 {
5641 /* Vector shl and shr insn patterns can be defined with scalar
5642 operand 2 (shift operand). In this case, use constant or loop
5643 invariant op1 directly, without extending it to vector mode
5644 first. */
5645 optab_op2_mode = insn_data[icode].operand[2].mode;
5646 if (!VECTOR_MODE_P (optab_op2_mode))
5647 {
73fbfcad 5648 if (dump_enabled_p ())
78c60e3d 5649 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5650 "operand 1 using scalar mode.\n");
9dc3f7de 5651 vec_oprnd1 = op1;
8930f723 5652 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5653 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5654 if (slp_node)
5655 {
5656 /* Store vec_oprnd1 for every vector stmt to be created
5657 for SLP_NODE. We check during the analysis that all
5658 the shift arguments are the same.
5659 TODO: Allow different constants for different vector
5660 stmts generated for an SLP instance. */
5661 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5662 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5663 }
5664 }
5665 }
5666
5667 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5668 (a special case for certain kind of vector shifts); otherwise,
5669 operand 1 should be of a vector type (the usual case). */
5670 if (vec_oprnd1)
5671 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5672 slp_node);
9dc3f7de
IR
5673 else
5674 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5675 slp_node);
9dc3f7de
IR
5676 }
5677 else
5678 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5679
5680 /* Arguments are ready. Create the new vector stmt. */
9771b263 5681 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5682 {
9771b263 5683 vop1 = vec_oprnds1[i];
0d0e4a03 5684 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5685 new_temp = make_ssa_name (vec_dest, new_stmt);
5686 gimple_assign_set_lhs (new_stmt, new_temp);
5687 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5688 if (slp_node)
9771b263 5689 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5690 }
5691
5692 if (slp_node)
5693 continue;
5694
5695 if (j == 0)
5696 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5697 else
5698 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5699 prev_stmt_info = vinfo_for_stmt (new_stmt);
5700 }
5701
9771b263
DN
5702 vec_oprnds0.release ();
5703 vec_oprnds1.release ();
9dc3f7de
IR
5704
5705 return true;
5706}
5707
5708
ebfd146a
IR
5709/* Function vectorizable_operation.
5710
16949072
RG
5711 Check if STMT performs a binary, unary or ternary operation that can
5712 be vectorized.
b8698a0f 5713 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5714 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5715 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5716
5717static bool
355fe088 5718vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
5719 gimple **vec_stmt, slp_tree slp_node,
5720 stmt_vector_for_cost *cost_vec)
ebfd146a 5721{
00f07b86 5722 tree vec_dest;
ebfd146a 5723 tree scalar_dest;
16949072 5724 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5725 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5726 tree vectype;
ebfd146a 5727 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5728 enum tree_code code, orig_code;
ef4bddc2 5729 machine_mode vec_mode;
ebfd146a
IR
5730 tree new_temp;
5731 int op_type;
00f07b86 5732 optab optab;
523ba738 5733 bool target_support_p;
16949072
RG
5734 enum vect_def_type dt[3]
5735 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5736 int ndts = 3;
355fe088 5737 gimple *new_stmt = NULL;
ebfd146a 5738 stmt_vec_info prev_stmt_info;
928686b1
RS
5739 poly_uint64 nunits_in;
5740 poly_uint64 nunits_out;
ebfd146a
IR
5741 tree vectype_out;
5742 int ncopies;
5743 int j, i;
6e1aa848
DN
5744 vec<tree> vec_oprnds0 = vNULL;
5745 vec<tree> vec_oprnds1 = vNULL;
5746 vec<tree> vec_oprnds2 = vNULL;
16949072 5747 tree vop0, vop1, vop2;
a70d6342 5748 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5749 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5750
a70d6342 5751 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5752 return false;
5753
66c16fd9
RB
5754 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5755 && ! vec_stmt)
ebfd146a
IR
5756 return false;
5757
5758 /* Is STMT a vectorizable binary/unary operation? */
5759 if (!is_gimple_assign (stmt))
5760 return false;
5761
5762 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5763 return false;
5764
0eb952ea 5765 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5766
1af4ebf5
MG
5767 /* For pointer addition and subtraction, we should use the normal
5768 plus and minus for the vector operation. */
ebfd146a
IR
5769 if (code == POINTER_PLUS_EXPR)
5770 code = PLUS_EXPR;
1af4ebf5
MG
5771 if (code == POINTER_DIFF_EXPR)
5772 code = MINUS_EXPR;
ebfd146a
IR
5773
5774 /* Support only unary or binary operations. */
5775 op_type = TREE_CODE_LENGTH (code);
16949072 5776 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5777 {
73fbfcad 5778 if (dump_enabled_p ())
78c60e3d 5779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5780 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5781 op_type);
ebfd146a
IR
5782 return false;
5783 }
5784
b690cc0f
RG
5785 scalar_dest = gimple_assign_lhs (stmt);
5786 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5787
7b7b1813
RG
5788 /* Most operations cannot handle bit-precision types without extra
5789 truncations. */
045c1278 5790 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5791 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5792 /* Exception are bitwise binary operations. */
5793 && code != BIT_IOR_EXPR
5794 && code != BIT_XOR_EXPR
5795 && code != BIT_AND_EXPR)
5796 {
73fbfcad 5797 if (dump_enabled_p ())
78c60e3d 5798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5799 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5800 return false;
5801 }
5802
ebfd146a 5803 op0 = gimple_assign_rhs1 (stmt);
894dd753 5804 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
ebfd146a 5805 {
73fbfcad 5806 if (dump_enabled_p ())
78c60e3d 5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5808 "use not simple.\n");
ebfd146a
IR
5809 return false;
5810 }
b690cc0f
RG
5811 /* If op0 is an external or constant def use a vector type with
5812 the same size as the output vector type. */
5813 if (!vectype)
b036c6c5
IE
5814 {
5815 /* For boolean type we cannot determine vectype by
5816 invariant value (don't know whether it is a vector
5817 of booleans or vector of integers). We use output
5818 vectype because operations on boolean don't change
5819 type. */
2568d8a1 5820 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5821 {
2568d8a1 5822 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5823 {
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5826 "not supported operation on bool value.\n");
5827 return false;
5828 }
5829 vectype = vectype_out;
5830 }
5831 else
5832 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5833 }
7d8930a0
IR
5834 if (vec_stmt)
5835 gcc_assert (vectype);
5836 if (!vectype)
5837 {
73fbfcad 5838 if (dump_enabled_p ())
7d8930a0 5839 {
78c60e3d
SS
5840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5841 "no vectype for scalar type ");
5842 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5843 TREE_TYPE (op0));
e645e942 5844 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5845 }
5846
5847 return false;
5848 }
b690cc0f
RG
5849
5850 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5851 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5852 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5853 return false;
ebfd146a 5854
16949072 5855 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5856 {
5857 op1 = gimple_assign_rhs2 (stmt);
894dd753 5858 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
ebfd146a 5859 {
73fbfcad 5860 if (dump_enabled_p ())
78c60e3d 5861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5862 "use not simple.\n");
ebfd146a
IR
5863 return false;
5864 }
5865 }
16949072
RG
5866 if (op_type == ternary_op)
5867 {
5868 op2 = gimple_assign_rhs3 (stmt);
894dd753 5869 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
16949072 5870 {
73fbfcad 5871 if (dump_enabled_p ())
78c60e3d 5872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5873 "use not simple.\n");
16949072
RG
5874 return false;
5875 }
5876 }
ebfd146a 5877
b690cc0f 5878 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5879 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5880 case of SLP. */
fce57248 5881 if (slp_node)
b690cc0f
RG
5882 ncopies = 1;
5883 else
e8f142e2 5884 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5885
5886 gcc_assert (ncopies >= 1);
5887
9dc3f7de 5888 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5889 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5890 || code == RROTATE_EXPR)
9dc3f7de 5891 return false;
ebfd146a 5892
ebfd146a 5893 /* Supportable by target? */
00f07b86
RH
5894
5895 vec_mode = TYPE_MODE (vectype);
5896 if (code == MULT_HIGHPART_EXPR)
523ba738 5897 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5898 else
5899 {
5900 optab = optab_for_tree_code (code, vectype, optab_default);
5901 if (!optab)
5deb57cb 5902 {
73fbfcad 5903 if (dump_enabled_p ())
78c60e3d 5904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5905 "no optab.\n");
00f07b86 5906 return false;
5deb57cb 5907 }
523ba738
RS
5908 target_support_p = (optab_handler (optab, vec_mode)
5909 != CODE_FOR_nothing);
5deb57cb
JJ
5910 }
5911
523ba738 5912 if (!target_support_p)
ebfd146a 5913 {
73fbfcad 5914 if (dump_enabled_p ())
78c60e3d 5915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5916 "op not supported by target.\n");
ebfd146a 5917 /* Check only during analysis. */
cf098191 5918 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5919 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5920 return false;
73fbfcad 5921 if (dump_enabled_p ())
e645e942
TJ
5922 dump_printf_loc (MSG_NOTE, vect_location,
5923 "proceeding using word mode.\n");
383d9c83
IR
5924 }
5925
4a00c761 5926 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5927 if (!VECTOR_MODE_P (vec_mode)
5928 && !vec_stmt
ca09abcb 5929 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5930 {
73fbfcad 5931 if (dump_enabled_p ())
78c60e3d 5932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5933 "not worthwhile without SIMD support.\n");
e34842c6 5934 return false;
7d8930a0 5935 }
ebfd146a 5936
ebfd146a
IR
5937 if (!vec_stmt) /* transformation not required. */
5938 {
4a00c761 5939 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
adac3a68 5940 DUMP_VECT_SCOPE ("vectorizable_operation");
68435eb2 5941 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5942 return true;
5943 }
5944
67b8dbac 5945 /* Transform. */
ebfd146a 5946
73fbfcad 5947 if (dump_enabled_p ())
78c60e3d 5948 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5949 "transform binary/unary operation.\n");
383d9c83 5950
0eb952ea
JJ
5951 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5952 vectors with unsigned elements, but the result is signed. So, we
5953 need to compute the MINUS_EXPR into vectype temporary and
5954 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5955 tree vec_cvt_dest = NULL_TREE;
5956 if (orig_code == POINTER_DIFF_EXPR)
7b76867b
RB
5957 {
5958 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5959 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5960 }
5961 /* Handle def. */
5962 else
5963 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
0eb952ea 5964
ebfd146a
IR
5965 /* In case the vectorization factor (VF) is bigger than the number
5966 of elements that we can fit in a vectype (nunits), we have to generate
5967 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5968 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5969 from one copy of the vector stmt to the next, in the field
5970 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5971 stages to find the correct vector defs to be used when vectorizing
5972 stmts that use the defs of the current stmt. The example below
5973 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5974 we need to create 4 vectorized stmts):
5975
5976 before vectorization:
5977 RELATED_STMT VEC_STMT
5978 S1: x = memref - -
5979 S2: z = x + 1 - -
5980
5981 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5982 there):
5983 RELATED_STMT VEC_STMT
5984 VS1_0: vx0 = memref0 VS1_1 -
5985 VS1_1: vx1 = memref1 VS1_2 -
5986 VS1_2: vx2 = memref2 VS1_3 -
5987 VS1_3: vx3 = memref3 - -
5988 S1: x = load - VS1_0
5989 S2: z = x + 1 - -
5990
5991 step2: vectorize stmt S2 (done here):
5992 To vectorize stmt S2 we first need to find the relevant vector
5993 def for the first operand 'x'. This is, as usual, obtained from
5994 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5995 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5996 relevant vector def 'vx0'. Having found 'vx0' we can generate
5997 the vector stmt VS2_0, and as usual, record it in the
5998 STMT_VINFO_VEC_STMT of stmt S2.
5999 When creating the second copy (VS2_1), we obtain the relevant vector
6000 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6001 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6002 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6003 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6004 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6005 chain of stmts and pointers:
6006 RELATED_STMT VEC_STMT
6007 VS1_0: vx0 = memref0 VS1_1 -
6008 VS1_1: vx1 = memref1 VS1_2 -
6009 VS1_2: vx2 = memref2 VS1_3 -
6010 VS1_3: vx3 = memref3 - -
6011 S1: x = load - VS1_0
6012 VS2_0: vz0 = vx0 + v1 VS2_1 -
6013 VS2_1: vz1 = vx1 + v1 VS2_2 -
6014 VS2_2: vz2 = vx2 + v1 VS2_3 -
6015 VS2_3: vz3 = vx3 + v1 - -
6016 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6017
6018 prev_stmt_info = NULL;
6019 for (j = 0; j < ncopies; j++)
6020 {
6021 /* Handle uses. */
6022 if (j == 0)
4a00c761 6023 {
d6476f90 6024 if (op_type == binary_op)
4a00c761 6025 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 6026 slp_node);
d6476f90
RB
6027 else if (op_type == ternary_op)
6028 {
6029 if (slp_node)
6030 {
6031 auto_vec<tree> ops(3);
6032 ops.quick_push (op0);
6033 ops.quick_push (op1);
6034 ops.quick_push (op2);
6035 auto_vec<vec<tree> > vec_defs(3);
6036 vect_get_slp_defs (ops, slp_node, &vec_defs);
6037 vec_oprnds0 = vec_defs[0];
6038 vec_oprnds1 = vec_defs[1];
6039 vec_oprnds2 = vec_defs[2];
6040 }
6041 else
6042 {
6043 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6044 NULL);
6045 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6046 NULL);
6047 }
6048 }
4a00c761
JJ
6049 else
6050 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 6051 slp_node);
4a00c761 6052 }
ebfd146a 6053 else
4a00c761
JJ
6054 {
6055 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6056 if (op_type == ternary_op)
6057 {
9771b263
DN
6058 tree vec_oprnd = vec_oprnds2.pop ();
6059 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6060 vec_oprnd));
4a00c761
JJ
6061 }
6062 }
6063
6064 /* Arguments are ready. Create the new vector stmt. */
9771b263 6065 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6066 {
4a00c761 6067 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6068 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6069 vop2 = ((op_type == ternary_op)
9771b263 6070 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 6071 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
6072 new_temp = make_ssa_name (vec_dest, new_stmt);
6073 gimple_assign_set_lhs (new_stmt, new_temp);
6074 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
6075 if (vec_cvt_dest)
6076 {
6077 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6078 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6079 new_temp);
6080 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6081 gimple_assign_set_lhs (new_stmt, new_temp);
6082 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6083 }
4a00c761 6084 if (slp_node)
9771b263 6085 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
6086 }
6087
4a00c761
JJ
6088 if (slp_node)
6089 continue;
6090
6091 if (j == 0)
6092 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6093 else
6094 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6095 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
6096 }
6097
9771b263
DN
6098 vec_oprnds0.release ();
6099 vec_oprnds1.release ();
6100 vec_oprnds2.release ();
ebfd146a 6101
ebfd146a
IR
6102 return true;
6103}
6104
f702e7d4 6105/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6106
6107static void
f702e7d4 6108ensure_base_align (struct data_reference *dr)
c716e67f 6109{
ca823c85 6110 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
c716e67f
XDL
6111 return;
6112
52639a61 6113 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6114 {
52639a61 6115 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6116
f702e7d4
RS
6117 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6118
428f0c67 6119 if (decl_in_symtab_p (base_decl))
f702e7d4 6120 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6121 else
6122 {
f702e7d4 6123 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6124 DECL_USER_ALIGN (base_decl) = 1;
6125 }
52639a61 6126 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6127 }
6128}
6129
ebfd146a 6130
44fc7854
BE
6131/* Function get_group_alias_ptr_type.
6132
6133 Return the alias type for the group starting at FIRST_STMT. */
6134
6135static tree
6136get_group_alias_ptr_type (gimple *first_stmt)
6137{
6138 struct data_reference *first_dr, *next_dr;
6139 gimple *next_stmt;
6140
6141 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6142 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
44fc7854
BE
6143 while (next_stmt)
6144 {
6145 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6146 if (get_alias_set (DR_REF (first_dr))
6147 != get_alias_set (DR_REF (next_dr)))
6148 {
6149 if (dump_enabled_p ())
6150 dump_printf_loc (MSG_NOTE, vect_location,
6151 "conflicting alias set types.\n");
6152 return ptr_type_node;
6153 }
2c53b149 6154 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
44fc7854
BE
6155 }
6156 return reference_alias_ptr_type (DR_REF (first_dr));
6157}
6158
6159
ebfd146a
IR
6160/* Function vectorizable_store.
6161
b8698a0f
L
6162 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6163 can be vectorized.
6164 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6165 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6166 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6167
6168static bool
355fe088 6169vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2 6170 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a 6171{
ebfd146a
IR
6172 tree data_ref;
6173 tree op;
6174 tree vec_oprnd = NULL_TREE;
6175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6176 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6177 tree elem_type;
ebfd146a 6178 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6179 struct loop *loop = NULL;
ef4bddc2 6180 machine_mode vec_mode;
ebfd146a
IR
6181 tree dummy;
6182 enum dr_alignment_support alignment_support_scheme;
929b4411
RS
6183 enum vect_def_type rhs_dt = vect_unknown_def_type;
6184 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6185 stmt_vec_info prev_stmt_info = NULL;
6186 tree dataref_ptr = NULL_TREE;
74bf76ed 6187 tree dataref_offset = NULL_TREE;
355fe088 6188 gimple *ptr_incr = NULL;
ebfd146a
IR
6189 int ncopies;
6190 int j;
2de001ee
RS
6191 gimple *next_stmt, *first_stmt;
6192 bool grouped_store;
ebfd146a 6193 unsigned int group_size, i;
6e1aa848
DN
6194 vec<tree> oprnds = vNULL;
6195 vec<tree> result_chain = vNULL;
ebfd146a 6196 bool inv_p;
09dfa495 6197 tree offset = NULL_TREE;
6e1aa848 6198 vec<tree> vec_oprnds = vNULL;
ebfd146a 6199 bool slp = (slp_node != NULL);
ebfd146a 6200 unsigned int vec_num;
a70d6342 6201 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6202 vec_info *vinfo = stmt_info->vinfo;
272c6793 6203 tree aggr_type;
134c85ca 6204 gather_scatter_info gs_info;
355fe088 6205 gimple *new_stmt;
d9f21f6a 6206 poly_uint64 vf;
2de001ee 6207 vec_load_store_type vls_type;
44fc7854 6208 tree ref_type;
a70d6342 6209
a70d6342 6210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6211 return false;
6212
66c16fd9
RB
6213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6214 && ! vec_stmt)
ebfd146a
IR
6215 return false;
6216
6217 /* Is vectorizable store? */
6218
c3a8f964
RS
6219 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6220 if (is_gimple_assign (stmt))
6221 {
6222 tree scalar_dest = gimple_assign_lhs (stmt);
6223 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6224 && is_pattern_stmt_p (stmt_info))
6225 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6226 if (TREE_CODE (scalar_dest) != ARRAY_REF
6227 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6228 && TREE_CODE (scalar_dest) != INDIRECT_REF
6229 && TREE_CODE (scalar_dest) != COMPONENT_REF
6230 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6231 && TREE_CODE (scalar_dest) != REALPART_EXPR
6232 && TREE_CODE (scalar_dest) != MEM_REF)
6233 return false;
6234 }
6235 else
6236 {
6237 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6238 if (!call || !gimple_call_internal_p (call))
6239 return false;
6240
6241 internal_fn ifn = gimple_call_internal_fn (call);
6242 if (!internal_store_fn_p (ifn))
c3a8f964 6243 return false;
ebfd146a 6244
c3a8f964
RS
6245 if (slp_node != NULL)
6246 {
6247 if (dump_enabled_p ())
6248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6249 "SLP of masked stores not supported.\n");
6250 return false;
6251 }
6252
f307441a
RS
6253 int mask_index = internal_fn_mask_index (ifn);
6254 if (mask_index >= 0)
6255 {
6256 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6257 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6258 &mask_vectype))
f307441a
RS
6259 return false;
6260 }
c3a8f964
RS
6261 }
6262
6263 op = vect_get_store_rhs (stmt);
ebfd146a 6264
fce57248
RS
6265 /* Cannot have hybrid store SLP -- that would mean storing to the
6266 same location twice. */
6267 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6268
f4d09712 6269 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6270 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6271
6272 if (loop_vinfo)
b17dc4d4
RB
6273 {
6274 loop = LOOP_VINFO_LOOP (loop_vinfo);
6275 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6276 }
6277 else
6278 vf = 1;
465c8c19
JJ
6279
6280 /* Multiple types in SLP are handled by creating the appropriate number of
6281 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6282 case of SLP. */
fce57248 6283 if (slp)
465c8c19
JJ
6284 ncopies = 1;
6285 else
e8f142e2 6286 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6287
6288 gcc_assert (ncopies >= 1);
6289
6290 /* FORNOW. This restriction should be relaxed. */
6291 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6292 {
6293 if (dump_enabled_p ())
6294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6295 "multiple types in nested loop.\n");
6296 return false;
6297 }
6298
929b4411 6299 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6300 return false;
6301
272c6793 6302 elem_type = TREE_TYPE (vectype);
ebfd146a 6303 vec_mode = TYPE_MODE (vectype);
7b7b1813 6304
ebfd146a
IR
6305 if (!STMT_VINFO_DATA_REF (stmt_info))
6306 return false;
6307
2de001ee 6308 vect_memory_access_type memory_access_type;
7e11fc7f 6309 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6310 &memory_access_type, &gs_info))
6311 return false;
3bab6342 6312
c3a8f964
RS
6313 if (mask)
6314 {
7e11fc7f
RS
6315 if (memory_access_type == VMAT_CONTIGUOUS)
6316 {
6317 if (!VECTOR_MODE_P (vec_mode)
6318 || !can_vec_mask_load_store_p (vec_mode,
6319 TYPE_MODE (mask_vectype), false))
6320 return false;
6321 }
f307441a
RS
6322 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6323 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6324 {
6325 if (dump_enabled_p ())
6326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6327 "unsupported access type for masked store.\n");
6328 return false;
6329 }
c3a8f964
RS
6330 }
6331 else
6332 {
6333 /* FORNOW. In some cases can vectorize even if data-type not supported
6334 (e.g. - array initialization with 0). */
6335 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6336 return false;
6337 }
6338
f307441a 6339 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6340 && memory_access_type != VMAT_GATHER_SCATTER
6341 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6342 if (grouped_store)
6343 {
2c53b149 6344 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7cfb4d93 6345 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2c53b149 6346 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7cfb4d93
RS
6347 }
6348 else
6349 {
6350 first_stmt = stmt;
6351 first_dr = dr;
6352 group_size = vec_num = 1;
6353 }
6354
ebfd146a
IR
6355 if (!vec_stmt) /* transformation not required. */
6356 {
2de001ee 6357 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6358
6359 if (loop_vinfo
6360 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6361 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6362 memory_access_type, &gs_info);
7cfb4d93 6363
ebfd146a 6364 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6365 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6366 vls_type, slp_node, cost_vec);
ebfd146a
IR
6367 return true;
6368 }
2de001ee 6369 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6370
67b8dbac 6371 /* Transform. */
ebfd146a 6372
f702e7d4 6373 ensure_base_align (dr);
c716e67f 6374
f307441a 6375 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6376 {
c3a8f964 6377 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6378 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6379 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6380 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6381 edge pe = loop_preheader_edge (loop);
6382 gimple_seq seq;
6383 basic_block new_bb;
6384 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6385 poly_uint64 scatter_off_nunits
6386 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6387
4d694b27 6388 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6389 modifier = NONE;
4d694b27 6390 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6391 {
3bab6342
AT
6392 modifier = WIDEN;
6393
4d694b27
RS
6394 /* Currently gathers and scatters are only supported for
6395 fixed-length vectors. */
6396 unsigned int count = scatter_off_nunits.to_constant ();
6397 vec_perm_builder sel (count, count, 1);
6398 for (i = 0; i < (unsigned int) count; ++i)
6399 sel.quick_push (i | (count / 2));
3bab6342 6400
4d694b27 6401 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6402 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6403 indices);
3bab6342
AT
6404 gcc_assert (perm_mask != NULL_TREE);
6405 }
4d694b27 6406 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6407 {
3bab6342
AT
6408 modifier = NARROW;
6409
4d694b27
RS
6410 /* Currently gathers and scatters are only supported for
6411 fixed-length vectors. */
6412 unsigned int count = nunits.to_constant ();
6413 vec_perm_builder sel (count, count, 1);
6414 for (i = 0; i < (unsigned int) count; ++i)
6415 sel.quick_push (i | (count / 2));
3bab6342 6416
4d694b27 6417 vec_perm_indices indices (sel, 2, count);
e3342de4 6418 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6419 gcc_assert (perm_mask != NULL_TREE);
6420 ncopies *= 2;
6421 }
6422 else
6423 gcc_unreachable ();
6424
134c85ca 6425 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6426 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6427 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6428 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6429 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6430 scaletype = TREE_VALUE (arglist);
6431
6432 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6433 && TREE_CODE (rettype) == VOID_TYPE);
6434
134c85ca 6435 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6436 if (!is_gimple_min_invariant (ptr))
6437 {
6438 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6439 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6440 gcc_assert (!new_bb);
6441 }
6442
6443 /* Currently we support only unconditional scatter stores,
6444 so mask should be all ones. */
6445 mask = build_int_cst (masktype, -1);
6446 mask = vect_init_vector (stmt, mask, masktype, NULL);
6447
134c85ca 6448 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6449
6450 prev_stmt_info = NULL;
6451 for (j = 0; j < ncopies; ++j)
6452 {
6453 if (j == 0)
6454 {
6455 src = vec_oprnd1
c3a8f964 6456 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6457 op = vec_oprnd0
134c85ca 6458 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6459 }
6460 else if (modifier != NONE && (j & 1))
6461 {
6462 if (modifier == WIDEN)
6463 {
6464 src = vec_oprnd1
929b4411 6465 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6466 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6467 stmt, gsi);
6468 }
6469 else if (modifier == NARROW)
6470 {
6471 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6472 stmt, gsi);
6473 op = vec_oprnd0
134c85ca
RS
6474 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6475 vec_oprnd0);
3bab6342
AT
6476 }
6477 else
6478 gcc_unreachable ();
6479 }
6480 else
6481 {
6482 src = vec_oprnd1
929b4411 6483 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6484 op = vec_oprnd0
134c85ca
RS
6485 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6486 vec_oprnd0);
3bab6342
AT
6487 }
6488
6489 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6490 {
928686b1
RS
6491 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6492 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6493 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6494 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6495 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6496 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6497 src = var;
6498 }
6499
6500 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6501 {
928686b1
RS
6502 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6503 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6504 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6505 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6506 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6508 op = var;
6509 }
6510
6511 new_stmt
134c85ca 6512 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6513
6514 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6515
6516 if (prev_stmt_info == NULL)
6517 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6518 else
6519 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6520 prev_stmt_info = vinfo_for_stmt (new_stmt);
6521 }
6522 return true;
6523 }
6524
f307441a 6525 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6526 {
2c53b149
RB
6527 gimple *group_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6528 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
f307441a 6529 }
ebfd146a 6530
f307441a
RS
6531 if (grouped_store)
6532 {
ebfd146a 6533 /* FORNOW */
a70d6342 6534 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6535
6536 /* We vectorize all the stmts of the interleaving group when we
6537 reach the last stmt in the group. */
2c53b149
RB
6538 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6539 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6540 && !slp)
6541 {
6542 *vec_stmt = NULL;
6543 return true;
6544 }
6545
6546 if (slp)
4b5caab7 6547 {
0d0293ac 6548 grouped_store = false;
4b5caab7
IR
6549 /* VEC_NUM is the number of vect stmts to be created for this
6550 group. */
6551 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6552 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2c53b149 6553 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6554 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6555 op = vect_get_store_rhs (first_stmt);
4b5caab7 6556 }
ebfd146a 6557 else
4b5caab7
IR
6558 /* VEC_NUM is the number of vect stmts to be created for this
6559 group. */
ebfd146a 6560 vec_num = group_size;
44fc7854
BE
6561
6562 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6563 }
b8698a0f 6564 else
7cfb4d93 6565 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6566
73fbfcad 6567 if (dump_enabled_p ())
78c60e3d 6568 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6569 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6570
2de001ee
RS
6571 if (memory_access_type == VMAT_ELEMENTWISE
6572 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6573 {
6574 gimple_stmt_iterator incr_gsi;
6575 bool insert_after;
355fe088 6576 gimple *incr;
f2e2a985
MM
6577 tree offvar;
6578 tree ivstep;
6579 tree running_off;
f2e2a985
MM
6580 tree stride_base, stride_step, alias_off;
6581 tree vec_oprnd;
f502d50e 6582 unsigned int g;
4d694b27
RS
6583 /* Checked by get_load_store_type. */
6584 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6585
7cfb4d93 6586 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6587 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6588
6589 stride_base
6590 = fold_build_pointer_plus
b210f45f 6591 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6592 size_binop (PLUS_EXPR,
b210f45f 6593 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6594 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6595 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6596
6597 /* For a store with loop-invariant (but other than power-of-2)
6598 stride (i.e. not a grouped access) like so:
6599
6600 for (i = 0; i < n; i += stride)
6601 array[i] = ...;
6602
6603 we generate a new induction variable and new stores from
6604 the components of the (vectorized) rhs:
6605
6606 for (j = 0; ; j += VF*stride)
6607 vectemp = ...;
6608 tmp1 = vectemp[0];
6609 array[j] = tmp1;
6610 tmp2 = vectemp[1];
6611 array[j + stride] = tmp2;
6612 ...
6613 */
6614
4d694b27 6615 unsigned nstores = const_nunits;
b17dc4d4 6616 unsigned lnel = 1;
cee62fee 6617 tree ltype = elem_type;
04199738 6618 tree lvectype = vectype;
cee62fee
MM
6619 if (slp)
6620 {
4d694b27
RS
6621 if (group_size < const_nunits
6622 && const_nunits % group_size == 0)
b17dc4d4 6623 {
4d694b27 6624 nstores = const_nunits / group_size;
b17dc4d4
RB
6625 lnel = group_size;
6626 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6627 lvectype = vectype;
6628
6629 /* First check if vec_extract optab doesn't support extraction
6630 of vector elts directly. */
b397965c 6631 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6632 machine_mode vmode;
6633 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6634 || !VECTOR_MODE_P (vmode)
414fef4e 6635 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6636 || (convert_optab_handler (vec_extract_optab,
6637 TYPE_MODE (vectype), vmode)
6638 == CODE_FOR_nothing))
6639 {
6640 /* Try to avoid emitting an extract of vector elements
6641 by performing the extracts using an integer type of the
6642 same size, extracting from a vector of those and then
6643 re-interpreting it as the original vector type if
6644 supported. */
6645 unsigned lsize
6646 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6647 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6648 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6649 /* If we can't construct such a vector fall back to
6650 element extracts from the original vector type and
6651 element size stores. */
4d694b27 6652 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6653 && VECTOR_MODE_P (vmode)
414fef4e 6654 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6655 && (convert_optab_handler (vec_extract_optab,
6656 vmode, elmode)
6657 != CODE_FOR_nothing))
6658 {
4d694b27 6659 nstores = lnunits;
04199738
RB
6660 lnel = group_size;
6661 ltype = build_nonstandard_integer_type (lsize, 1);
6662 lvectype = build_vector_type (ltype, nstores);
6663 }
6664 /* Else fall back to vector extraction anyway.
6665 Fewer stores are more important than avoiding spilling
6666 of the vector we extract from. Compared to the
6667 construction case in vectorizable_load no store-forwarding
6668 issue exists here for reasonable archs. */
6669 }
b17dc4d4 6670 }
4d694b27
RS
6671 else if (group_size >= const_nunits
6672 && group_size % const_nunits == 0)
b17dc4d4
RB
6673 {
6674 nstores = 1;
4d694b27 6675 lnel = const_nunits;
b17dc4d4 6676 ltype = vectype;
04199738 6677 lvectype = vectype;
b17dc4d4 6678 }
cee62fee
MM
6679 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6680 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6681 }
6682
f2e2a985
MM
6683 ivstep = stride_step;
6684 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6685 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6686
6687 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6688
b210f45f
RB
6689 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6690 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6691 create_iv (stride_base, ivstep, NULL,
6692 loop, &incr_gsi, insert_after,
6693 &offvar, NULL);
6694 incr = gsi_stmt (incr_gsi);
310213d4 6695 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985 6696
b210f45f 6697 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6698
6699 prev_stmt_info = NULL;
44fc7854 6700 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6701 next_stmt = first_stmt;
6702 for (g = 0; g < group_size; g++)
f2e2a985 6703 {
f502d50e
MM
6704 running_off = offvar;
6705 if (g)
f2e2a985 6706 {
f502d50e
MM
6707 tree size = TYPE_SIZE_UNIT (ltype);
6708 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6709 size);
f502d50e 6710 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6711 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6712 running_off, pos);
f2e2a985 6713 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6714 running_off = newoff;
f502d50e 6715 }
b17dc4d4
RB
6716 unsigned int group_el = 0;
6717 unsigned HOST_WIDE_INT
6718 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6719 for (j = 0; j < ncopies; j++)
6720 {
c3a8f964 6721 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6722 and first_stmt == stmt. */
6723 if (j == 0)
6724 {
6725 if (slp)
6726 {
6727 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6728 slp_node);
f502d50e
MM
6729 vec_oprnd = vec_oprnds[0];
6730 }
6731 else
6732 {
c3a8f964 6733 op = vect_get_store_rhs (next_stmt);
81c40241 6734 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6735 }
6736 }
f2e2a985 6737 else
f502d50e
MM
6738 {
6739 if (slp)
6740 vec_oprnd = vec_oprnds[j];
6741 else
c079cbac 6742 {
894dd753 6743 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411
RS
6744 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6745 vec_oprnd);
c079cbac 6746 }
f502d50e 6747 }
04199738
RB
6748 /* Pun the vector to extract from if necessary. */
6749 if (lvectype != vectype)
6750 {
6751 tree tem = make_ssa_name (lvectype);
6752 gimple *pun
6753 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6754 lvectype, vec_oprnd));
6755 vect_finish_stmt_generation (stmt, pun, gsi);
6756 vec_oprnd = tem;
6757 }
f502d50e
MM
6758 for (i = 0; i < nstores; i++)
6759 {
6760 tree newref, newoff;
355fe088 6761 gimple *incr, *assign;
f502d50e
MM
6762 tree size = TYPE_SIZE (ltype);
6763 /* Extract the i'th component. */
6764 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6765 bitsize_int (i), size);
6766 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6767 size, pos);
6768
6769 elem = force_gimple_operand_gsi (gsi, elem, true,
6770 NULL_TREE, true,
6771 GSI_SAME_STMT);
6772
b17dc4d4
RB
6773 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6774 group_el * elsz);
f502d50e 6775 newref = build2 (MEM_REF, ltype,
b17dc4d4 6776 running_off, this_off);
19986382 6777 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6778
6779 /* And store it to *running_off. */
6780 assign = gimple_build_assign (newref, elem);
6781 vect_finish_stmt_generation (stmt, assign, gsi);
6782
b17dc4d4
RB
6783 group_el += lnel;
6784 if (! slp
6785 || group_el == group_size)
6786 {
6787 newoff = copy_ssa_name (running_off, NULL);
6788 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6789 running_off, stride_step);
6790 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6791
b17dc4d4
RB
6792 running_off = newoff;
6793 group_el = 0;
6794 }
225ce44b
RB
6795 if (g == group_size - 1
6796 && !slp)
f502d50e
MM
6797 {
6798 if (j == 0 && i == 0)
225ce44b
RB
6799 STMT_VINFO_VEC_STMT (stmt_info)
6800 = *vec_stmt = assign;
f502d50e
MM
6801 else
6802 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6803 prev_stmt_info = vinfo_for_stmt (assign);
6804 }
6805 }
f2e2a985 6806 }
2c53b149 6807 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6808 if (slp)
6809 break;
f2e2a985 6810 }
778dd3b6
RB
6811
6812 vec_oprnds.release ();
f2e2a985
MM
6813 return true;
6814 }
6815
8c681247 6816 auto_vec<tree> dr_chain (group_size);
9771b263 6817 oprnds.create (group_size);
ebfd146a 6818
720f5239 6819 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6820 gcc_assert (alignment_support_scheme);
70088b95
RS
6821 vec_loop_masks *loop_masks
6822 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6823 ? &LOOP_VINFO_MASKS (loop_vinfo)
6824 : NULL);
272c6793 6825 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6826 realignment. vect_supportable_dr_alignment always returns either
6827 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6828 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6829 && !mask
70088b95 6830 && !loop_masks)
272c6793
RS
6831 || alignment_support_scheme == dr_aligned
6832 || alignment_support_scheme == dr_unaligned_supported);
6833
62da9e14
RS
6834 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6835 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6836 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6837
f307441a
RS
6838 tree bump;
6839 tree vec_offset = NULL_TREE;
6840 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6841 {
6842 aggr_type = NULL_TREE;
6843 bump = NULL_TREE;
6844 }
6845 else if (memory_access_type == VMAT_GATHER_SCATTER)
6846 {
6847 aggr_type = elem_type;
6848 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6849 &bump, &vec_offset);
6850 }
272c6793 6851 else
f307441a
RS
6852 {
6853 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6854 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6855 else
6856 aggr_type = vectype;
6857 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6858 }
ebfd146a 6859
c3a8f964
RS
6860 if (mask)
6861 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6862
ebfd146a
IR
6863 /* In case the vectorization factor (VF) is bigger than the number
6864 of elements that we can fit in a vectype (nunits), we have to generate
6865 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6866 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6867 vect_get_vec_def_for_copy_stmt. */
6868
0d0293ac 6869 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6870
6871 S1: &base + 2 = x2
6872 S2: &base = x0
6873 S3: &base + 1 = x1
6874 S4: &base + 3 = x3
6875
6876 We create vectorized stores starting from base address (the access of the
6877 first stmt in the chain (S2 in the above example), when the last store stmt
6878 of the chain (S4) is reached:
6879
6880 VS1: &base = vx2
6881 VS2: &base + vec_size*1 = vx0
6882 VS3: &base + vec_size*2 = vx1
6883 VS4: &base + vec_size*3 = vx3
6884
6885 Then permutation statements are generated:
6886
3fcc1b55
JJ
6887 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6888 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6889 ...
b8698a0f 6890
ebfd146a
IR
6891 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6892 (the order of the data-refs in the output of vect_permute_store_chain
6893 corresponds to the order of scalar stmts in the interleaving chain - see
6894 the documentation of vect_permute_store_chain()).
6895
6896 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6897 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6898 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6899 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6900 */
6901
6902 prev_stmt_info = NULL;
c3a8f964 6903 tree vec_mask = NULL_TREE;
ebfd146a
IR
6904 for (j = 0; j < ncopies; j++)
6905 {
ebfd146a
IR
6906
6907 if (j == 0)
6908 {
6909 if (slp)
6910 {
6911 /* Get vectorized arguments for SLP_NODE. */
d092494c 6912 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6913 NULL, slp_node);
ebfd146a 6914
9771b263 6915 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6916 }
6917 else
6918 {
b8698a0f
L
6919 /* For interleaved stores we collect vectorized defs for all the
6920 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6921 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6922 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6923
2c53b149 6924 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6925 OPRNDS are of size 1. */
b8698a0f 6926 next_stmt = first_stmt;
ebfd146a
IR
6927 for (i = 0; i < group_size; i++)
6928 {
b8698a0f 6929 /* Since gaps are not supported for interleaved stores,
2c53b149 6930 DR_GROUP_SIZE is the exact number of stmts in the chain.
b8698a0f 6931 Therefore, NEXT_STMT can't be NULL_TREE. In case that
2c53b149 6932 there is no interleaving, DR_GROUP_SIZE is 1, and only one
ebfd146a 6933 iteration of the loop will be executed. */
c3a8f964 6934 op = vect_get_store_rhs (next_stmt);
81c40241 6935 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6936 dr_chain.quick_push (vec_oprnd);
6937 oprnds.quick_push (vec_oprnd);
2c53b149 6938 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6939 }
c3a8f964
RS
6940 if (mask)
6941 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6942 mask_vectype);
ebfd146a
IR
6943 }
6944
6945 /* We should have catched mismatched types earlier. */
6946 gcc_assert (useless_type_conversion_p (vectype,
6947 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6948 bool simd_lane_access_p
6949 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6950 if (simd_lane_access_p
6951 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6952 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6953 && integer_zerop (DR_OFFSET (first_dr))
6954 && integer_zerop (DR_INIT (first_dr))
6955 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6956 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6957 {
6958 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6959 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6960 inv_p = false;
74bf76ed 6961 }
f307441a
RS
6962 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6963 {
6964 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6965 &dataref_ptr, &vec_offset);
6966 inv_p = false;
6967 }
74bf76ed
JJ
6968 else
6969 dataref_ptr
6970 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6971 simd_lane_access_p ? loop : NULL,
09dfa495 6972 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
6973 simd_lane_access_p, &inv_p,
6974 NULL_TREE, bump);
a70d6342 6975 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6976 }
b8698a0f 6977 else
ebfd146a 6978 {
b8698a0f
L
6979 /* For interleaved stores we created vectorized defs for all the
6980 defs stored in OPRNDS in the previous iteration (previous copy).
6981 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6982 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6983 next copy.
2c53b149 6984 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6985 OPRNDS are of size 1. */
6986 for (i = 0; i < group_size; i++)
6987 {
9771b263 6988 op = oprnds[i];
894dd753 6989 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411 6990 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
6991 dr_chain[i] = vec_oprnd;
6992 oprnds[i] = vec_oprnd;
ebfd146a 6993 }
c3a8f964 6994 if (mask)
929b4411 6995 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
6996 if (dataref_offset)
6997 dataref_offset
f307441a
RS
6998 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6999 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
7000 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7001 vec_offset);
74bf76ed
JJ
7002 else
7003 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 7004 bump);
ebfd146a
IR
7005 }
7006
2de001ee 7007 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7008 {
272c6793 7009 tree vec_array;
267d3070 7010
3ba4ff41 7011 /* Get an array into which we can store the individual vectors. */
272c6793 7012 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7013
7014 /* Invalidate the current contents of VEC_ARRAY. This should
7015 become an RTL clobber too, which prevents the vector registers
7016 from being upward-exposed. */
7017 vect_clobber_variable (stmt, gsi, vec_array);
7018
7019 /* Store the individual vectors into the array. */
272c6793 7020 for (i = 0; i < vec_num; i++)
c2d7ab2a 7021 {
9771b263 7022 vec_oprnd = dr_chain[i];
272c6793 7023 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 7024 }
b8698a0f 7025
7cfb4d93 7026 tree final_mask = NULL;
70088b95
RS
7027 if (loop_masks)
7028 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7029 vectype, j);
7cfb4d93
RS
7030 if (vec_mask)
7031 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7032 vec_mask, gsi);
7033
7e11fc7f 7034 gcall *call;
7cfb4d93 7035 if (final_mask)
7e11fc7f
RS
7036 {
7037 /* Emit:
7038 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7039 VEC_ARRAY). */
7040 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7041 tree alias_ptr = build_int_cst (ref_type, align);
7042 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7043 dataref_ptr, alias_ptr,
7cfb4d93 7044 final_mask, vec_array);
7e11fc7f
RS
7045 }
7046 else
7047 {
7048 /* Emit:
7049 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7050 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7051 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7052 vec_array);
7053 gimple_call_set_lhs (call, data_ref);
7054 }
a844293d
RS
7055 gimple_call_set_nothrow (call, true);
7056 new_stmt = call;
267d3070 7057 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41
RS
7058
7059 /* Record that VEC_ARRAY is now dead. */
7060 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
7061 }
7062 else
7063 {
7064 new_stmt = NULL;
0d0293ac 7065 if (grouped_store)
272c6793 7066 {
b6b9227d
JJ
7067 if (j == 0)
7068 result_chain.create (group_size);
272c6793
RS
7069 /* Permute. */
7070 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7071 &result_chain);
7072 }
c2d7ab2a 7073
272c6793
RS
7074 next_stmt = first_stmt;
7075 for (i = 0; i < vec_num; i++)
7076 {
644ffefd 7077 unsigned align, misalign;
272c6793 7078
7cfb4d93 7079 tree final_mask = NULL_TREE;
70088b95
RS
7080 if (loop_masks)
7081 final_mask = vect_get_loop_mask (gsi, loop_masks,
7082 vec_num * ncopies,
7cfb4d93
RS
7083 vectype, vec_num * j + i);
7084 if (vec_mask)
7085 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7086 vec_mask, gsi);
7087
f307441a
RS
7088 if (memory_access_type == VMAT_GATHER_SCATTER)
7089 {
7090 tree scale = size_int (gs_info.scale);
7091 gcall *call;
70088b95 7092 if (loop_masks)
f307441a
RS
7093 call = gimple_build_call_internal
7094 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7095 scale, vec_oprnd, final_mask);
7096 else
7097 call = gimple_build_call_internal
7098 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7099 scale, vec_oprnd);
7100 gimple_call_set_nothrow (call, true);
7101 new_stmt = call;
7102 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7103 break;
7104 }
7105
272c6793
RS
7106 if (i > 0)
7107 /* Bump the vector pointer. */
7108 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7109 stmt, bump);
272c6793
RS
7110
7111 if (slp)
9771b263 7112 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7113 else if (grouped_store)
7114 /* For grouped stores vectorized defs are interleaved in
272c6793 7115 vect_permute_store_chain(). */
9771b263 7116 vec_oprnd = result_chain[i];
272c6793 7117
f702e7d4 7118 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7119 if (aligned_access_p (first_dr))
644ffefd 7120 misalign = 0;
272c6793
RS
7121 else if (DR_MISALIGNMENT (first_dr) == -1)
7122 {
25f68d90 7123 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7124 misalign = 0;
272c6793
RS
7125 }
7126 else
c3a8f964 7127 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7128 if (dataref_offset == NULL_TREE
7129 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7130 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7131 misalign);
c2d7ab2a 7132
62da9e14 7133 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7134 {
7135 tree perm_mask = perm_mask_for_reverse (vectype);
7136 tree perm_dest
c3a8f964 7137 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7138 vectype);
b731b390 7139 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7140
7141 /* Generate the permute statement. */
355fe088 7142 gimple *perm_stmt
0d0e4a03
JJ
7143 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7144 vec_oprnd, perm_mask);
09dfa495
BM
7145 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7146
7147 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7148 vec_oprnd = new_temp;
7149 }
7150
272c6793 7151 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7152 if (final_mask)
c3a8f964
RS
7153 {
7154 align = least_bit_hwi (misalign | align);
7155 tree ptr = build_int_cst (ref_type, align);
7156 gcall *call
7157 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7158 dataref_ptr, ptr,
7cfb4d93 7159 final_mask, vec_oprnd);
c3a8f964
RS
7160 gimple_call_set_nothrow (call, true);
7161 new_stmt = call;
7162 }
7163 else
7164 {
7165 data_ref = fold_build2 (MEM_REF, vectype,
7166 dataref_ptr,
7167 dataref_offset
7168 ? dataref_offset
7169 : build_int_cst (ref_type, 0));
7170 if (aligned_access_p (first_dr))
7171 ;
7172 else if (DR_MISALIGNMENT (first_dr) == -1)
7173 TREE_TYPE (data_ref)
7174 = build_aligned_type (TREE_TYPE (data_ref),
7175 align * BITS_PER_UNIT);
7176 else
7177 TREE_TYPE (data_ref)
7178 = build_aligned_type (TREE_TYPE (data_ref),
7179 TYPE_ALIGN (elem_type));
19986382 7180 vect_copy_ref_info (data_ref, DR_REF (first_dr));
c3a8f964
RS
7181 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7182 }
272c6793 7183 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
7184
7185 if (slp)
7186 continue;
7187
2c53b149 7188 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
7189 if (!next_stmt)
7190 break;
7191 }
ebfd146a 7192 }
1da0876c
RS
7193 if (!slp)
7194 {
7195 if (j == 0)
7196 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7197 else
7198 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7199 prev_stmt_info = vinfo_for_stmt (new_stmt);
7200 }
ebfd146a
IR
7201 }
7202
9771b263
DN
7203 oprnds.release ();
7204 result_chain.release ();
7205 vec_oprnds.release ();
ebfd146a
IR
7206
7207 return true;
7208}
7209
557be5a8
AL
7210/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7211 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7212 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7213 vect_gen_perm_mask_checked. */
a1e53f3f 7214
3fcc1b55 7215tree
4aae3cb3 7216vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7217{
b00cb3bf 7218 tree mask_type;
a1e53f3f 7219
0ecc2b7d
RS
7220 poly_uint64 nunits = sel.length ();
7221 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7222
7223 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7224 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7225}
7226
7ac7e286 7227/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7228 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7229
7230tree
4aae3cb3 7231vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7232{
7ac7e286 7233 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7234 return vect_gen_perm_mask_any (vectype, sel);
7235}
7236
aec7ae7d
JJ
7237/* Given a vector variable X and Y, that was generated for the scalar
7238 STMT, generate instructions to permute the vector elements of X and Y
7239 using permutation mask MASK_VEC, insert them at *GSI and return the
7240 permuted vector variable. */
a1e53f3f
L
7241
7242static tree
355fe088 7243permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7244 gimple_stmt_iterator *gsi)
a1e53f3f
L
7245{
7246 tree vectype = TREE_TYPE (x);
aec7ae7d 7247 tree perm_dest, data_ref;
355fe088 7248 gimple *perm_stmt;
a1e53f3f 7249
7ad429a4
RS
7250 tree scalar_dest = gimple_get_lhs (stmt);
7251 if (TREE_CODE (scalar_dest) == SSA_NAME)
7252 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7253 else
7254 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7255 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7256
7257 /* Generate the permute statement. */
0d0e4a03 7258 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7259 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7260
7261 return data_ref;
7262}
7263
6b916b36
RB
7264/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7265 inserting them on the loops preheader edge. Returns true if we
7266 were successful in doing so (and thus STMT can be moved then),
7267 otherwise returns false. */
7268
7269static bool
355fe088 7270hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7271{
7272 ssa_op_iter i;
7273 tree op;
7274 bool any = false;
7275
7276 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7277 {
355fe088 7278 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7279 if (!gimple_nop_p (def_stmt)
7280 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7281 {
7282 /* Make sure we don't need to recurse. While we could do
7283 so in simple cases when there are more complex use webs
7284 we don't have an easy way to preserve stmt order to fulfil
7285 dependencies within them. */
7286 tree op2;
7287 ssa_op_iter i2;
d1417442
JJ
7288 if (gimple_code (def_stmt) == GIMPLE_PHI)
7289 return false;
6b916b36
RB
7290 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7291 {
355fe088 7292 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7293 if (!gimple_nop_p (def_stmt2)
7294 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7295 return false;
7296 }
7297 any = true;
7298 }
7299 }
7300
7301 if (!any)
7302 return true;
7303
7304 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7305 {
355fe088 7306 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7307 if (!gimple_nop_p (def_stmt)
7308 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7309 {
7310 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7311 gsi_remove (&gsi, false);
7312 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7313 }
7314 }
7315
7316 return true;
7317}
7318
ebfd146a
IR
7319/* vectorizable_load.
7320
b8698a0f
L
7321 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7322 can be vectorized.
7323 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7324 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7325 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7326
7327static bool
355fe088 7328vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
68435eb2
RB
7329 slp_tree slp_node, slp_instance slp_node_instance,
7330 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7331{
7332 tree scalar_dest;
7333 tree vec_dest = NULL;
7334 tree data_ref = NULL;
7335 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7336 stmt_vec_info prev_stmt_info;
ebfd146a 7337 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7338 struct loop *loop = NULL;
ebfd146a 7339 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7340 bool nested_in_vect_loop = false;
c716e67f 7341 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7342 tree elem_type;
ebfd146a 7343 tree new_temp;
ef4bddc2 7344 machine_mode mode;
355fe088 7345 gimple *new_stmt = NULL;
ebfd146a
IR
7346 tree dummy;
7347 enum dr_alignment_support alignment_support_scheme;
7348 tree dataref_ptr = NULL_TREE;
74bf76ed 7349 tree dataref_offset = NULL_TREE;
355fe088 7350 gimple *ptr_incr = NULL;
ebfd146a 7351 int ncopies;
4d694b27
RS
7352 int i, j;
7353 unsigned int group_size;
7354 poly_uint64 group_gap_adj;
ebfd146a
IR
7355 tree msq = NULL_TREE, lsq;
7356 tree offset = NULL_TREE;
356bbc4c 7357 tree byte_offset = NULL_TREE;
ebfd146a 7358 tree realignment_token = NULL_TREE;
538dd0b7 7359 gphi *phi = NULL;
6e1aa848 7360 vec<tree> dr_chain = vNULL;
0d0293ac 7361 bool grouped_load = false;
355fe088 7362 gimple *first_stmt;
4f0a0218 7363 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
7364 bool inv_p;
7365 bool compute_in_loop = false;
7366 struct loop *at_loop;
7367 int vec_num;
7368 bool slp = (slp_node != NULL);
7369 bool slp_perm = false;
a70d6342 7370 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7371 poly_uint64 vf;
272c6793 7372 tree aggr_type;
134c85ca 7373 gather_scatter_info gs_info;
310213d4 7374 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7375 tree ref_type;
929b4411 7376 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7377
465c8c19
JJ
7378 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7379 return false;
7380
66c16fd9
RB
7381 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7382 && ! vec_stmt)
465c8c19
JJ
7383 return false;
7384
c3a8f964
RS
7385 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7386 if (is_gimple_assign (stmt))
7387 {
7388 scalar_dest = gimple_assign_lhs (stmt);
7389 if (TREE_CODE (scalar_dest) != SSA_NAME)
7390 return false;
465c8c19 7391
c3a8f964
RS
7392 tree_code code = gimple_assign_rhs_code (stmt);
7393 if (code != ARRAY_REF
7394 && code != BIT_FIELD_REF
7395 && code != INDIRECT_REF
7396 && code != COMPONENT_REF
7397 && code != IMAGPART_EXPR
7398 && code != REALPART_EXPR
7399 && code != MEM_REF
7400 && TREE_CODE_CLASS (code) != tcc_declaration)
7401 return false;
7402 }
7403 else
7404 {
7405 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7406 if (!call || !gimple_call_internal_p (call))
7407 return false;
7408
7409 internal_fn ifn = gimple_call_internal_fn (call);
7410 if (!internal_load_fn_p (ifn))
c3a8f964 7411 return false;
465c8c19 7412
c3a8f964
RS
7413 scalar_dest = gimple_call_lhs (call);
7414 if (!scalar_dest)
7415 return false;
7416
7417 if (slp_node != NULL)
7418 {
7419 if (dump_enabled_p ())
7420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7421 "SLP of masked loads not supported.\n");
7422 return false;
7423 }
7424
bfaa08b7
RS
7425 int mask_index = internal_fn_mask_index (ifn);
7426 if (mask_index >= 0)
7427 {
7428 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7429 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7430 &mask_vectype))
bfaa08b7
RS
7431 return false;
7432 }
c3a8f964 7433 }
465c8c19
JJ
7434
7435 if (!STMT_VINFO_DATA_REF (stmt_info))
7436 return false;
7437
7438 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7439 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7440
a70d6342
IR
7441 if (loop_vinfo)
7442 {
7443 loop = LOOP_VINFO_LOOP (loop_vinfo);
7444 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7445 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7446 }
7447 else
3533e503 7448 vf = 1;
ebfd146a
IR
7449
7450 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7451 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7452 case of SLP. */
fce57248 7453 if (slp)
ebfd146a
IR
7454 ncopies = 1;
7455 else
e8f142e2 7456 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7457
7458 gcc_assert (ncopies >= 1);
7459
7460 /* FORNOW. This restriction should be relaxed. */
7461 if (nested_in_vect_loop && ncopies > 1)
7462 {
73fbfcad 7463 if (dump_enabled_p ())
78c60e3d 7464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7465 "multiple types in nested loop.\n");
ebfd146a
IR
7466 return false;
7467 }
7468
f2556b68
RB
7469 /* Invalidate assumptions made by dependence analysis when vectorization
7470 on the unrolled body effectively re-orders stmts. */
7471 if (ncopies > 1
7472 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7473 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7474 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7475 {
7476 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7478 "cannot perform implicit CSE when unrolling "
7479 "with negative dependence distance\n");
7480 return false;
7481 }
7482
7b7b1813 7483 elem_type = TREE_TYPE (vectype);
947131ba 7484 mode = TYPE_MODE (vectype);
ebfd146a
IR
7485
7486 /* FORNOW. In some cases can vectorize even if data-type not supported
7487 (e.g. - data copies). */
947131ba 7488 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7489 {
73fbfcad 7490 if (dump_enabled_p ())
78c60e3d 7491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7492 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7493 return false;
7494 }
7495
ebfd146a 7496 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7497 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7498 {
0d0293ac 7499 grouped_load = true;
ebfd146a 7500 /* FORNOW */
2de001ee
RS
7501 gcc_assert (!nested_in_vect_loop);
7502 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7503
2c53b149
RB
7504 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7505 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7506
b1af7da6
RB
7507 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7508 slp_perm = true;
7509
f2556b68
RB
7510 /* Invalidate assumptions made by dependence analysis when vectorization
7511 on the unrolled body effectively re-orders stmts. */
7512 if (!PURE_SLP_STMT (stmt_info)
7513 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7514 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7515 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7516 {
7517 if (dump_enabled_p ())
7518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7519 "cannot perform implicit CSE when performing "
7520 "group loads with negative dependence distance\n");
7521 return false;
7522 }
96bb56b2
RB
7523
7524 /* Similarly when the stmt is a load that is both part of a SLP
7525 instance and a loop vectorized stmt via the same-dr mechanism
7526 we have to give up. */
2c53b149 7527 if (DR_GROUP_SAME_DR_STMT (stmt_info)
96bb56b2
RB
7528 && (STMT_SLP_TYPE (stmt_info)
7529 != STMT_SLP_TYPE (vinfo_for_stmt
2c53b149 7530 (DR_GROUP_SAME_DR_STMT (stmt_info)))))
96bb56b2
RB
7531 {
7532 if (dump_enabled_p ())
7533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7534 "conflicting SLP types for CSEd load\n");
7535 return false;
7536 }
ebfd146a 7537 }
7cfb4d93
RS
7538 else
7539 group_size = 1;
ebfd146a 7540
2de001ee 7541 vect_memory_access_type memory_access_type;
7e11fc7f 7542 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7543 &memory_access_type, &gs_info))
7544 return false;
a1e53f3f 7545
c3a8f964
RS
7546 if (mask)
7547 {
7548 if (memory_access_type == VMAT_CONTIGUOUS)
7549 {
7e11fc7f
RS
7550 machine_mode vec_mode = TYPE_MODE (vectype);
7551 if (!VECTOR_MODE_P (vec_mode)
7552 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7553 TYPE_MODE (mask_vectype), true))
7554 return false;
7555 }
bfaa08b7 7556 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7557 {
7558 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7559 tree masktype
7560 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7561 if (TREE_CODE (masktype) == INTEGER_TYPE)
7562 {
7563 if (dump_enabled_p ())
7564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7565 "masked gather with integer mask not"
7566 " supported.");
7567 return false;
7568 }
7569 }
bfaa08b7
RS
7570 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7571 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7572 {
7573 if (dump_enabled_p ())
7574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7575 "unsupported access type for masked load.\n");
7576 return false;
7577 }
7578 }
7579
ebfd146a
IR
7580 if (!vec_stmt) /* transformation not required. */
7581 {
2de001ee
RS
7582 if (!slp)
7583 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7584
7585 if (loop_vinfo
7586 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7587 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7588 memory_access_type, &gs_info);
7cfb4d93 7589
ebfd146a 7590 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7591 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7592 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7593 return true;
7594 }
7595
2de001ee
RS
7596 if (!slp)
7597 gcc_assert (memory_access_type
7598 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7599
73fbfcad 7600 if (dump_enabled_p ())
78c60e3d 7601 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7602 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7603
67b8dbac 7604 /* Transform. */
ebfd146a 7605
f702e7d4 7606 ensure_base_align (dr);
c716e67f 7607
bfaa08b7 7608 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7609 {
929b4411
RS
7610 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7611 mask_dt);
aec7ae7d
JJ
7612 return true;
7613 }
2de001ee
RS
7614
7615 if (memory_access_type == VMAT_ELEMENTWISE
7616 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7617 {
7618 gimple_stmt_iterator incr_gsi;
7619 bool insert_after;
355fe088 7620 gimple *incr;
7d75abc8 7621 tree offvar;
7d75abc8
MM
7622 tree ivstep;
7623 tree running_off;
9771b263 7624 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7625 tree stride_base, stride_step, alias_off;
4d694b27
RS
7626 /* Checked by get_load_store_type. */
7627 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7628 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7629
7cfb4d93 7630 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7631 gcc_assert (!nested_in_vect_loop);
7d75abc8 7632
b210f45f 7633 if (grouped_load)
44fc7854 7634 {
2c53b149 7635 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7636 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
44fc7854 7637 }
ab313a8c 7638 else
44fc7854
BE
7639 {
7640 first_stmt = stmt;
7641 first_dr = dr;
b210f45f
RB
7642 }
7643 if (slp && grouped_load)
7644 {
2c53b149 7645 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
b210f45f
RB
7646 ref_type = get_group_alias_ptr_type (first_stmt);
7647 }
7648 else
7649 {
7650 if (grouped_load)
7651 cst_offset
7652 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7653 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
44fc7854 7654 group_size = 1;
b210f45f 7655 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7656 }
ab313a8c 7657
14ac6aa2
RB
7658 stride_base
7659 = fold_build_pointer_plus
ab313a8c 7660 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7661 size_binop (PLUS_EXPR,
ab313a8c
RB
7662 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7663 convert_to_ptrofftype (DR_INIT (first_dr))));
7664 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7665
7666 /* For a load with loop-invariant (but other than power-of-2)
7667 stride (i.e. not a grouped access) like so:
7668
7669 for (i = 0; i < n; i += stride)
7670 ... = array[i];
7671
7672 we generate a new induction variable and new accesses to
7673 form a new vector (or vectors, depending on ncopies):
7674
7675 for (j = 0; ; j += VF*stride)
7676 tmp1 = array[j];
7677 tmp2 = array[j + stride];
7678 ...
7679 vectemp = {tmp1, tmp2, ...}
7680 */
7681
ab313a8c
RB
7682 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7683 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7684
7685 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7686
b210f45f
RB
7687 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7688 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7689 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7690 loop, &incr_gsi, insert_after,
7691 &offvar, NULL);
7692 incr = gsi_stmt (incr_gsi);
310213d4 7693 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7694
b210f45f 7695 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7696
7697 prev_stmt_info = NULL;
7698 running_off = offvar;
44fc7854 7699 alias_off = build_int_cst (ref_type, 0);
4d694b27 7700 int nloads = const_nunits;
e09b4c37 7701 int lnel = 1;
7b5fc413 7702 tree ltype = TREE_TYPE (vectype);
ea60dd34 7703 tree lvectype = vectype;
b266b968 7704 auto_vec<tree> dr_chain;
2de001ee 7705 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7706 {
4d694b27 7707 if (group_size < const_nunits)
e09b4c37 7708 {
ff03930a
JJ
7709 /* First check if vec_init optab supports construction from
7710 vector elts directly. */
b397965c 7711 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7712 machine_mode vmode;
7713 if (mode_for_vector (elmode, group_size).exists (&vmode)
7714 && VECTOR_MODE_P (vmode)
414fef4e 7715 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7716 && (convert_optab_handler (vec_init_optab,
7717 TYPE_MODE (vectype), vmode)
7718 != CODE_FOR_nothing))
ea60dd34 7719 {
4d694b27 7720 nloads = const_nunits / group_size;
ea60dd34 7721 lnel = group_size;
ff03930a
JJ
7722 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7723 }
7724 else
7725 {
7726 /* Otherwise avoid emitting a constructor of vector elements
7727 by performing the loads using an integer type of the same
7728 size, constructing a vector of those and then
7729 re-interpreting it as the original vector type.
7730 This avoids a huge runtime penalty due to the general
7731 inability to perform store forwarding from smaller stores
7732 to a larger load. */
7733 unsigned lsize
7734 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7735 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7736 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7737 /* If we can't construct such a vector fall back to
7738 element loads of the original vector type. */
4d694b27 7739 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7740 && VECTOR_MODE_P (vmode)
414fef4e 7741 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7742 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7743 != CODE_FOR_nothing))
7744 {
4d694b27 7745 nloads = lnunits;
ff03930a
JJ
7746 lnel = group_size;
7747 ltype = build_nonstandard_integer_type (lsize, 1);
7748 lvectype = build_vector_type (ltype, nloads);
7749 }
ea60dd34 7750 }
e09b4c37 7751 }
2de001ee 7752 else
e09b4c37 7753 {
ea60dd34 7754 nloads = 1;
4d694b27 7755 lnel = const_nunits;
e09b4c37 7756 ltype = vectype;
e09b4c37 7757 }
2de001ee
RS
7758 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7759 }
bb4e4747
BC
7760 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7761 else if (nloads == 1)
7762 ltype = vectype;
7763
2de001ee
RS
7764 if (slp)
7765 {
66c16fd9
RB
7766 /* For SLP permutation support we need to load the whole group,
7767 not only the number of vector stmts the permutation result
7768 fits in. */
b266b968 7769 if (slp_perm)
66c16fd9 7770 {
d9f21f6a
RS
7771 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7772 variable VF. */
7773 unsigned int const_vf = vf.to_constant ();
4d694b27 7774 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7775 dr_chain.create (ncopies);
7776 }
7777 else
7778 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7779 }
4d694b27 7780 unsigned int group_el = 0;
e09b4c37
RB
7781 unsigned HOST_WIDE_INT
7782 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7783 for (j = 0; j < ncopies; j++)
7784 {
7b5fc413 7785 if (nloads > 1)
e09b4c37
RB
7786 vec_alloc (v, nloads);
7787 for (i = 0; i < nloads; i++)
7b5fc413 7788 {
e09b4c37 7789 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7790 group_el * elsz + cst_offset);
19986382
RB
7791 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7792 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7793 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
e09b4c37
RB
7794 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7795 if (nloads > 1)
7796 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7797 gimple_assign_lhs (new_stmt));
7798
7799 group_el += lnel;
7800 if (! slp
7801 || group_el == group_size)
7b5fc413 7802 {
e09b4c37
RB
7803 tree newoff = copy_ssa_name (running_off);
7804 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7805 running_off, stride_step);
7b5fc413
RB
7806 vect_finish_stmt_generation (stmt, incr, gsi);
7807
7808 running_off = newoff;
e09b4c37 7809 group_el = 0;
7b5fc413 7810 }
7b5fc413 7811 }
e09b4c37 7812 if (nloads > 1)
7d75abc8 7813 {
ea60dd34
RB
7814 tree vec_inv = build_constructor (lvectype, v);
7815 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7816 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7817 if (lvectype != vectype)
7818 {
7819 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7820 VIEW_CONVERT_EXPR,
7821 build1 (VIEW_CONVERT_EXPR,
7822 vectype, new_temp));
7823 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7824 }
7d75abc8
MM
7825 }
7826
7b5fc413 7827 if (slp)
b266b968 7828 {
b266b968
RB
7829 if (slp_perm)
7830 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7831 else
7832 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7833 }
7d75abc8 7834 else
225ce44b
RB
7835 {
7836 if (j == 0)
7837 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7838 else
7839 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7840 prev_stmt_info = vinfo_for_stmt (new_stmt);
7841 }
7d75abc8 7842 }
b266b968 7843 if (slp_perm)
29afecdf
RB
7844 {
7845 unsigned n_perms;
7846 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7847 slp_node_instance, false, &n_perms);
7848 }
7d75abc8
MM
7849 return true;
7850 }
aec7ae7d 7851
b5ec4de7
RS
7852 if (memory_access_type == VMAT_GATHER_SCATTER
7853 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7854 grouped_load = false;
7855
0d0293ac 7856 if (grouped_load)
ebfd146a 7857 {
2c53b149
RB
7858 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7859 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7860 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7861 without permutation. */
7862 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7863 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7864 /* For BB vectorization always use the first stmt to base
7865 the data ref pointer on. */
7866 if (bb_vinfo)
7867 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7868
ebfd146a 7869 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7870 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7871 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7872 ??? But we can only do so if there is exactly one
7873 as we have no way to get at the rest. Leave the CSE
7874 opportunity alone.
7875 ??? With the group load eventually participating
7876 in multiple different permutations (having multiple
7877 slp nodes which refer to the same group) the CSE
7878 is even wrong code. See PR56270. */
7879 && !slp)
ebfd146a
IR
7880 {
7881 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7882 return true;
7883 }
7884 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7885 group_gap_adj = 0;
ebfd146a
IR
7886
7887 /* VEC_NUM is the number of vect stmts to be created for this group. */
7888 if (slp)
7889 {
0d0293ac 7890 grouped_load = false;
91ff1504
RB
7891 /* For SLP permutation support we need to load the whole group,
7892 not only the number of vector stmts the permutation result
7893 fits in. */
7894 if (slp_perm)
b267968e 7895 {
d9f21f6a
RS
7896 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7897 variable VF. */
7898 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7899 unsigned int const_nunits = nunits.to_constant ();
7900 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7901 group_gap_adj = vf * group_size - nunits * vec_num;
7902 }
91ff1504 7903 else
b267968e
RB
7904 {
7905 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7906 group_gap_adj
7907 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7908 }
a70d6342 7909 }
ebfd146a 7910 else
9b999e8c 7911 vec_num = group_size;
44fc7854
BE
7912
7913 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7914 }
7915 else
7916 {
7917 first_stmt = stmt;
7918 first_dr = dr;
7919 group_size = vec_num = 1;
9b999e8c 7920 group_gap_adj = 0;
44fc7854 7921 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7922 }
7923
720f5239 7924 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7925 gcc_assert (alignment_support_scheme);
70088b95
RS
7926 vec_loop_masks *loop_masks
7927 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7928 ? &LOOP_VINFO_MASKS (loop_vinfo)
7929 : NULL);
7cfb4d93
RS
7930 /* Targets with store-lane instructions must not require explicit
7931 realignment. vect_supportable_dr_alignment always returns either
7932 dr_aligned or dr_unaligned_supported for masked operations. */
7933 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7934 && !mask
70088b95 7935 && !loop_masks)
272c6793
RS
7936 || alignment_support_scheme == dr_aligned
7937 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7938
7939 /* In case the vectorization factor (VF) is bigger than the number
7940 of elements that we can fit in a vectype (nunits), we have to generate
7941 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7942 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7943 from one copy of the vector stmt to the next, in the field
ff802fa1 7944 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7945 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7946 stmts that use the defs of the current stmt. The example below
7947 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7948 need to create 4 vectorized stmts):
ebfd146a
IR
7949
7950 before vectorization:
7951 RELATED_STMT VEC_STMT
7952 S1: x = memref - -
7953 S2: z = x + 1 - -
7954
7955 step 1: vectorize stmt S1:
7956 We first create the vector stmt VS1_0, and, as usual, record a
7957 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7958 Next, we create the vector stmt VS1_1, and record a pointer to
7959 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7960 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7961 stmts and pointers:
7962 RELATED_STMT VEC_STMT
7963 VS1_0: vx0 = memref0 VS1_1 -
7964 VS1_1: vx1 = memref1 VS1_2 -
7965 VS1_2: vx2 = memref2 VS1_3 -
7966 VS1_3: vx3 = memref3 - -
7967 S1: x = load - VS1_0
7968 S2: z = x + 1 - -
7969
b8698a0f
L
7970 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7971 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7972 stmt S2. */
7973
0d0293ac 7974 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7975
7976 S1: x2 = &base + 2
7977 S2: x0 = &base
7978 S3: x1 = &base + 1
7979 S4: x3 = &base + 3
7980
b8698a0f 7981 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7982 starting from the access of the first stmt of the chain:
7983
7984 VS1: vx0 = &base
7985 VS2: vx1 = &base + vec_size*1
7986 VS3: vx3 = &base + vec_size*2
7987 VS4: vx4 = &base + vec_size*3
7988
7989 Then permutation statements are generated:
7990
e2c83630
RH
7991 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7992 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7993 ...
7994
7995 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7996 (the order of the data-refs in the output of vect_permute_load_chain
7997 corresponds to the order of scalar stmts in the interleaving chain - see
7998 the documentation of vect_permute_load_chain()).
7999 The generation of permutation stmts and recording them in
0d0293ac 8000 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8001
b8698a0f 8002 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8003 permutation stmts above are created for every copy. The result vector
8004 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8005 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8006
8007 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8008 on a target that supports unaligned accesses (dr_unaligned_supported)
8009 we generate the following code:
8010 p = initial_addr;
8011 indx = 0;
8012 loop {
8013 p = p + indx * vectype_size;
8014 vec_dest = *(p);
8015 indx = indx + 1;
8016 }
8017
8018 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8019 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8020 then generate the following code, in which the data in each iteration is
8021 obtained by two vector loads, one from the previous iteration, and one
8022 from the current iteration:
8023 p1 = initial_addr;
8024 msq_init = *(floor(p1))
8025 p2 = initial_addr + VS - 1;
8026 realignment_token = call target_builtin;
8027 indx = 0;
8028 loop {
8029 p2 = p2 + indx * vectype_size
8030 lsq = *(floor(p2))
8031 vec_dest = realign_load (msq, lsq, realignment_token)
8032 indx = indx + 1;
8033 msq = lsq;
8034 } */
8035
8036 /* If the misalignment remains the same throughout the execution of the
8037 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8038 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8039 This can only occur when vectorizing memory accesses in the inner-loop
8040 nested within an outer-loop that is being vectorized. */
8041
d1e4b493 8042 if (nested_in_vect_loop
cf098191
RS
8043 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8044 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8045 {
8046 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8047 compute_in_loop = true;
8048 }
8049
8050 if ((alignment_support_scheme == dr_explicit_realign_optimized
8051 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8052 && !compute_in_loop)
ebfd146a
IR
8053 {
8054 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8055 alignment_support_scheme, NULL_TREE,
8056 &at_loop);
8057 if (alignment_support_scheme == dr_explicit_realign_optimized)
8058 {
538dd0b7 8059 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8060 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8061 size_one_node);
ebfd146a
IR
8062 }
8063 }
8064 else
8065 at_loop = loop;
8066
62da9e14 8067 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8068 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8069
ab2fc782
RS
8070 tree bump;
8071 tree vec_offset = NULL_TREE;
8072 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8073 {
8074 aggr_type = NULL_TREE;
8075 bump = NULL_TREE;
8076 }
8077 else if (memory_access_type == VMAT_GATHER_SCATTER)
8078 {
8079 aggr_type = elem_type;
8080 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8081 &bump, &vec_offset);
8082 }
272c6793 8083 else
ab2fc782
RS
8084 {
8085 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8086 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8087 else
8088 aggr_type = vectype;
8089 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8090 }
272c6793 8091
c3a8f964 8092 tree vec_mask = NULL_TREE;
ebfd146a 8093 prev_stmt_info = NULL;
4d694b27 8094 poly_uint64 group_elt = 0;
ebfd146a 8095 for (j = 0; j < ncopies; j++)
b8698a0f 8096 {
272c6793 8097 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8098 if (j == 0)
74bf76ed
JJ
8099 {
8100 bool simd_lane_access_p
8101 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8102 if (simd_lane_access_p
8103 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8104 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8105 && integer_zerop (DR_OFFSET (first_dr))
8106 && integer_zerop (DR_INIT (first_dr))
8107 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8108 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8109 && (alignment_support_scheme == dr_aligned
8110 || alignment_support_scheme == dr_unaligned_supported))
8111 {
8112 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8113 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8114 inv_p = false;
74bf76ed 8115 }
4f0a0218
RB
8116 else if (first_stmt_for_drptr
8117 && first_stmt != first_stmt_for_drptr)
8118 {
8119 dataref_ptr
8120 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8121 at_loop, offset, &dummy, gsi,
8122 &ptr_incr, simd_lane_access_p,
ab2fc782 8123 &inv_p, byte_offset, bump);
4f0a0218
RB
8124 /* Adjust the pointer by the difference to first_stmt. */
8125 data_reference_p ptrdr
8126 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8127 tree diff = fold_convert (sizetype,
8128 size_binop (MINUS_EXPR,
8129 DR_INIT (first_dr),
8130 DR_INIT (ptrdr)));
8131 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8132 stmt, diff);
8133 }
bfaa08b7
RS
8134 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8135 {
8136 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8137 &dataref_ptr, &vec_offset);
8138 inv_p = false;
8139 }
74bf76ed
JJ
8140 else
8141 dataref_ptr
8142 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8143 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8144 simd_lane_access_p, &inv_p,
ab2fc782 8145 byte_offset, bump);
c3a8f964
RS
8146 if (mask)
8147 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8148 mask_vectype);
74bf76ed 8149 }
ebfd146a 8150 else
c3a8f964
RS
8151 {
8152 if (dataref_offset)
8153 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8154 bump);
bfaa08b7 8155 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8156 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8157 vec_offset);
c3a8f964 8158 else
ab2fc782
RS
8159 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8160 stmt, bump);
c3a8f964 8161 if (mask)
929b4411 8162 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8163 }
ebfd146a 8164
0d0293ac 8165 if (grouped_load || slp_perm)
9771b263 8166 dr_chain.create (vec_num);
5ce1ee7f 8167
2de001ee 8168 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8169 {
272c6793
RS
8170 tree vec_array;
8171
8172 vec_array = create_vector_array (vectype, vec_num);
8173
7cfb4d93 8174 tree final_mask = NULL_TREE;
70088b95
RS
8175 if (loop_masks)
8176 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8177 vectype, j);
7cfb4d93
RS
8178 if (vec_mask)
8179 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8180 vec_mask, gsi);
8181
7e11fc7f 8182 gcall *call;
7cfb4d93 8183 if (final_mask)
7e11fc7f
RS
8184 {
8185 /* Emit:
8186 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8187 VEC_MASK). */
8188 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8189 tree alias_ptr = build_int_cst (ref_type, align);
8190 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8191 dataref_ptr, alias_ptr,
7cfb4d93 8192 final_mask);
7e11fc7f
RS
8193 }
8194 else
8195 {
8196 /* Emit:
8197 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8198 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8199 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8200 }
a844293d
RS
8201 gimple_call_set_lhs (call, vec_array);
8202 gimple_call_set_nothrow (call, true);
8203 new_stmt = call;
272c6793 8204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8205
272c6793
RS
8206 /* Extract each vector into an SSA_NAME. */
8207 for (i = 0; i < vec_num; i++)
ebfd146a 8208 {
272c6793
RS
8209 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8210 vec_array, i);
9771b263 8211 dr_chain.quick_push (new_temp);
272c6793
RS
8212 }
8213
8214 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8215 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8216
8217 /* Record that VEC_ARRAY is now dead. */
8218 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8219 }
8220 else
8221 {
8222 for (i = 0; i < vec_num; i++)
8223 {
7cfb4d93 8224 tree final_mask = NULL_TREE;
70088b95 8225 if (loop_masks
7cfb4d93 8226 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8227 final_mask = vect_get_loop_mask (gsi, loop_masks,
8228 vec_num * ncopies,
7cfb4d93
RS
8229 vectype, vec_num * j + i);
8230 if (vec_mask)
8231 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8232 vec_mask, gsi);
8233
272c6793
RS
8234 if (i > 0)
8235 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8236 stmt, bump);
272c6793
RS
8237
8238 /* 2. Create the vector-load in the loop. */
8239 switch (alignment_support_scheme)
8240 {
8241 case dr_aligned:
8242 case dr_unaligned_supported:
be1ac4ec 8243 {
644ffefd
MJ
8244 unsigned int align, misalign;
8245
bfaa08b7
RS
8246 if (memory_access_type == VMAT_GATHER_SCATTER)
8247 {
8248 tree scale = size_int (gs_info.scale);
8249 gcall *call;
70088b95 8250 if (loop_masks)
bfaa08b7
RS
8251 call = gimple_build_call_internal
8252 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8253 vec_offset, scale, final_mask);
8254 else
8255 call = gimple_build_call_internal
8256 (IFN_GATHER_LOAD, 3, dataref_ptr,
8257 vec_offset, scale);
8258 gimple_call_set_nothrow (call, true);
8259 new_stmt = call;
8260 data_ref = NULL_TREE;
8261 break;
8262 }
8263
f702e7d4 8264 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8265 if (alignment_support_scheme == dr_aligned)
8266 {
8267 gcc_assert (aligned_access_p (first_dr));
644ffefd 8268 misalign = 0;
272c6793
RS
8269 }
8270 else if (DR_MISALIGNMENT (first_dr) == -1)
8271 {
25f68d90 8272 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8273 misalign = 0;
272c6793
RS
8274 }
8275 else
c3a8f964 8276 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8277 if (dataref_offset == NULL_TREE
8278 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8279 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8280 align, misalign);
c3a8f964 8281
7cfb4d93 8282 if (final_mask)
c3a8f964
RS
8283 {
8284 align = least_bit_hwi (misalign | align);
8285 tree ptr = build_int_cst (ref_type, align);
8286 gcall *call
8287 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8288 dataref_ptr, ptr,
7cfb4d93 8289 final_mask);
c3a8f964
RS
8290 gimple_call_set_nothrow (call, true);
8291 new_stmt = call;
8292 data_ref = NULL_TREE;
8293 }
8294 else
8295 {
8296 data_ref
8297 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8298 dataref_offset
8299 ? dataref_offset
8300 : build_int_cst (ref_type, 0));
8301 if (alignment_support_scheme == dr_aligned)
8302 ;
8303 else if (DR_MISALIGNMENT (first_dr) == -1)
8304 TREE_TYPE (data_ref)
8305 = build_aligned_type (TREE_TYPE (data_ref),
8306 align * BITS_PER_UNIT);
8307 else
8308 TREE_TYPE (data_ref)
8309 = build_aligned_type (TREE_TYPE (data_ref),
8310 TYPE_ALIGN (elem_type));
8311 }
272c6793 8312 break;
be1ac4ec 8313 }
272c6793 8314 case dr_explicit_realign:
267d3070 8315 {
272c6793 8316 tree ptr, bump;
272c6793 8317
d88981fc 8318 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8319
8320 if (compute_in_loop)
8321 msq = vect_setup_realignment (first_stmt, gsi,
8322 &realignment_token,
8323 dr_explicit_realign,
8324 dataref_ptr, NULL);
8325
aed93b23
RB
8326 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8327 ptr = copy_ssa_name (dataref_ptr);
8328 else
8329 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8330 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8331 new_stmt = gimple_build_assign
8332 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8333 build_int_cst
8334 (TREE_TYPE (dataref_ptr),
f702e7d4 8335 -(HOST_WIDE_INT) align));
272c6793
RS
8336 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8337 data_ref
8338 = build2 (MEM_REF, vectype, ptr,
44fc7854 8339 build_int_cst (ref_type, 0));
19986382 8340 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8341 vec_dest = vect_create_destination_var (scalar_dest,
8342 vectype);
8343 new_stmt = gimple_build_assign (vec_dest, data_ref);
8344 new_temp = make_ssa_name (vec_dest, new_stmt);
8345 gimple_assign_set_lhs (new_stmt, new_temp);
8346 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8347 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8348 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8349 msq = new_temp;
8350
d88981fc 8351 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8352 TYPE_SIZE_UNIT (elem_type));
d88981fc 8353 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8354 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8355 new_stmt = gimple_build_assign
8356 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8357 build_int_cst
f702e7d4 8358 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8359 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8360 gimple_assign_set_lhs (new_stmt, ptr);
8361 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8362 data_ref
8363 = build2 (MEM_REF, vectype, ptr,
44fc7854 8364 build_int_cst (ref_type, 0));
272c6793 8365 break;
267d3070 8366 }
272c6793 8367 case dr_explicit_realign_optimized:
f702e7d4
RS
8368 {
8369 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8370 new_temp = copy_ssa_name (dataref_ptr);
8371 else
8372 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8373 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8374 new_stmt = gimple_build_assign
8375 (new_temp, BIT_AND_EXPR, dataref_ptr,
8376 build_int_cst (TREE_TYPE (dataref_ptr),
8377 -(HOST_WIDE_INT) align));
8378 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8379 data_ref
8380 = build2 (MEM_REF, vectype, new_temp,
8381 build_int_cst (ref_type, 0));
8382 break;
8383 }
272c6793
RS
8384 default:
8385 gcc_unreachable ();
8386 }
ebfd146a 8387 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8388 /* DATA_REF is null if we've already built the statement. */
8389 if (data_ref)
19986382
RB
8390 {
8391 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8392 new_stmt = gimple_build_assign (vec_dest, data_ref);
8393 }
ebfd146a 8394 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8395 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
8396 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8397
272c6793
RS
8398 /* 3. Handle explicit realignment if necessary/supported.
8399 Create in loop:
8400 vec_dest = realign_load (msq, lsq, realignment_token) */
8401 if (alignment_support_scheme == dr_explicit_realign_optimized
8402 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8403 {
272c6793
RS
8404 lsq = gimple_assign_lhs (new_stmt);
8405 if (!realignment_token)
8406 realignment_token = dataref_ptr;
8407 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8408 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8409 msq, lsq, realignment_token);
272c6793
RS
8410 new_temp = make_ssa_name (vec_dest, new_stmt);
8411 gimple_assign_set_lhs (new_stmt, new_temp);
8412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8413
8414 if (alignment_support_scheme == dr_explicit_realign_optimized)
8415 {
8416 gcc_assert (phi);
8417 if (i == vec_num - 1 && j == ncopies - 1)
8418 add_phi_arg (phi, lsq,
8419 loop_latch_edge (containing_loop),
9e227d60 8420 UNKNOWN_LOCATION);
272c6793
RS
8421 msq = lsq;
8422 }
ebfd146a 8423 }
ebfd146a 8424
59fd17e3
RB
8425 /* 4. Handle invariant-load. */
8426 if (inv_p && !bb_vinfo)
8427 {
59fd17e3 8428 gcc_assert (!grouped_load);
d1417442
JJ
8429 /* If we have versioned for aliasing or the loop doesn't
8430 have any data dependencies that would preclude this,
8431 then we are sure this is a loop invariant load and
8432 thus we can insert it on the preheader edge. */
8433 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8434 && !nested_in_vect_loop
6b916b36 8435 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8436 {
8437 if (dump_enabled_p ())
8438 {
8439 dump_printf_loc (MSG_NOTE, vect_location,
8440 "hoisting out of the vectorized "
8441 "loop: ");
8442 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8443 }
b731b390 8444 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8445 gsi_insert_on_edge_immediate
8446 (loop_preheader_edge (loop),
8447 gimple_build_assign (tem,
8448 unshare_expr
8449 (gimple_assign_rhs1 (stmt))));
8450 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
8451 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8452 set_vinfo_for_stmt (new_stmt,
8453 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
8454 }
8455 else
8456 {
8457 gimple_stmt_iterator gsi2 = *gsi;
8458 gsi_next (&gsi2);
8459 new_temp = vect_init_vector (stmt, scalar_dest,
8460 vectype, &gsi2);
34cd48e5 8461 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 8462 }
59fd17e3
RB
8463 }
8464
62da9e14 8465 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8466 {
aec7ae7d
JJ
8467 tree perm_mask = perm_mask_for_reverse (vectype);
8468 new_temp = permute_vec_elements (new_temp, new_temp,
8469 perm_mask, stmt, gsi);
ebfd146a
IR
8470 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8471 }
267d3070 8472
272c6793 8473 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8474 vect_transform_grouped_load (). */
8475 if (grouped_load || slp_perm)
9771b263 8476 dr_chain.quick_push (new_temp);
267d3070 8477
272c6793
RS
8478 /* Store vector loads in the corresponding SLP_NODE. */
8479 if (slp && !slp_perm)
9771b263 8480 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
8481
8482 /* With SLP permutation we load the gaps as well, without
8483 we need to skip the gaps after we manage to fully load
2c53b149 8484 all elements. group_gap_adj is DR_GROUP_SIZE here. */
b267968e 8485 group_elt += nunits;
d9f21f6a
RS
8486 if (maybe_ne (group_gap_adj, 0U)
8487 && !slp_perm
8488 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8489 {
d9f21f6a
RS
8490 poly_wide_int bump_val
8491 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8492 * group_gap_adj);
8e6cdc90 8493 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8494 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8495 stmt, bump);
8496 group_elt = 0;
8497 }
272c6793 8498 }
9b999e8c
RB
8499 /* Bump the vector pointer to account for a gap or for excess
8500 elements loaded for a permuted SLP load. */
d9f21f6a 8501 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8502 {
d9f21f6a
RS
8503 poly_wide_int bump_val
8504 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8505 * group_gap_adj);
8e6cdc90 8506 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8507 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8508 stmt, bump);
8509 }
ebfd146a
IR
8510 }
8511
8512 if (slp && !slp_perm)
8513 continue;
8514
8515 if (slp_perm)
8516 {
29afecdf 8517 unsigned n_perms;
01d8bf07 8518 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8519 slp_node_instance, false,
8520 &n_perms))
ebfd146a 8521 {
9771b263 8522 dr_chain.release ();
ebfd146a
IR
8523 return false;
8524 }
8525 }
8526 else
8527 {
0d0293ac 8528 if (grouped_load)
ebfd146a 8529 {
2de001ee 8530 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8531 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8532 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8533 }
8534 else
8535 {
8536 if (j == 0)
8537 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8538 else
8539 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8540 prev_stmt_info = vinfo_for_stmt (new_stmt);
8541 }
8542 }
9771b263 8543 dr_chain.release ();
ebfd146a
IR
8544 }
8545
ebfd146a
IR
8546 return true;
8547}
8548
8549/* Function vect_is_simple_cond.
b8698a0f 8550
ebfd146a
IR
8551 Input:
8552 LOOP - the loop that is being vectorized.
8553 COND - Condition that is checked for simple use.
8554
e9e1d143
RG
8555 Output:
8556 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8557 *DTS - The def types for the arguments of the comparison
e9e1d143 8558
ebfd146a
IR
8559 Returns whether a COND can be vectorized. Checks whether
8560 condition operands are supportable using vec_is_simple_use. */
8561
87aab9b2 8562static bool
4fc5ebf1 8563vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8564 tree *comp_vectype, enum vect_def_type *dts,
8565 tree vectype)
ebfd146a
IR
8566{
8567 tree lhs, rhs;
e9e1d143 8568 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8569
a414c77f
IE
8570 /* Mask case. */
8571 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8572 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f 8573 {
894dd753 8574 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
a414c77f
IE
8575 || !*comp_vectype
8576 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8577 return false;
8578 return true;
8579 }
8580
ebfd146a
IR
8581 if (!COMPARISON_CLASS_P (cond))
8582 return false;
8583
8584 lhs = TREE_OPERAND (cond, 0);
8585 rhs = TREE_OPERAND (cond, 1);
8586
8587 if (TREE_CODE (lhs) == SSA_NAME)
8588 {
894dd753 8589 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
ebfd146a
IR
8590 return false;
8591 }
4fc5ebf1
JG
8592 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8593 || TREE_CODE (lhs) == FIXED_CST)
8594 dts[0] = vect_constant_def;
8595 else
ebfd146a
IR
8596 return false;
8597
8598 if (TREE_CODE (rhs) == SSA_NAME)
8599 {
894dd753 8600 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
ebfd146a
IR
8601 return false;
8602 }
4fc5ebf1
JG
8603 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8604 || TREE_CODE (rhs) == FIXED_CST)
8605 dts[1] = vect_constant_def;
8606 else
ebfd146a
IR
8607 return false;
8608
28b33016 8609 if (vectype1 && vectype2
928686b1
RS
8610 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8611 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8612 return false;
8613
e9e1d143 8614 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8615 /* Invariant comparison. */
4515e413 8616 if (! *comp_vectype && vectype)
8da4c8d8
RB
8617 {
8618 tree scalar_type = TREE_TYPE (lhs);
8619 /* If we can widen the comparison to match vectype do so. */
8620 if (INTEGRAL_TYPE_P (scalar_type)
8621 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8622 TYPE_SIZE (TREE_TYPE (vectype))))
8623 scalar_type = build_nonstandard_integer_type
8624 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8625 TYPE_UNSIGNED (scalar_type));
8626 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8627 }
8628
ebfd146a
IR
8629 return true;
8630}
8631
8632/* vectorizable_condition.
8633
b8698a0f
L
8634 Check if STMT is conditional modify expression that can be vectorized.
8635 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8636 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8637 at GSI.
8638
8639 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8640 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8641 else clause if it is 2).
ebfd146a
IR
8642
8643 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8644
4bbe8262 8645bool
355fe088
TS
8646vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8647 gimple **vec_stmt, tree reduc_def, int reduc_index,
68435eb2 8648 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
ebfd146a
IR
8649{
8650 tree scalar_dest = NULL_TREE;
8651 tree vec_dest = NULL_TREE;
01216d27
JJ
8652 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8653 tree then_clause, else_clause;
ebfd146a 8654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8655 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8656 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8657 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8658 tree vec_compare;
ebfd146a
IR
8659 tree new_temp;
8660 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8661 enum vect_def_type dts[4]
8662 = {vect_unknown_def_type, vect_unknown_def_type,
8663 vect_unknown_def_type, vect_unknown_def_type};
8664 int ndts = 4;
f7e531cf 8665 int ncopies;
01216d27 8666 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8667 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8668 int i, j;
8669 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8670 vec<tree> vec_oprnds0 = vNULL;
8671 vec<tree> vec_oprnds1 = vNULL;
8672 vec<tree> vec_oprnds2 = vNULL;
8673 vec<tree> vec_oprnds3 = vNULL;
74946978 8674 tree vec_cmp_type;
a414c77f 8675 bool masked = false;
b8698a0f 8676
f7e531cf
IR
8677 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8678 return false;
8679
bb6c2b68
RS
8680 vect_reduction_type reduction_type
8681 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8682 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8683 {
8684 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8685 return false;
ebfd146a 8686
af29617a
AH
8687 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8688 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8689 && reduc_def))
8690 return false;
ebfd146a 8691
af29617a
AH
8692 /* FORNOW: not yet supported. */
8693 if (STMT_VINFO_LIVE_P (stmt_info))
8694 {
8695 if (dump_enabled_p ())
8696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8697 "value used after loop.\n");
8698 return false;
8699 }
ebfd146a
IR
8700 }
8701
8702 /* Is vectorizable conditional operation? */
8703 if (!is_gimple_assign (stmt))
8704 return false;
8705
8706 code = gimple_assign_rhs_code (stmt);
8707
8708 if (code != COND_EXPR)
8709 return false;
8710
465c8c19 8711 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8712 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8713
fce57248 8714 if (slp_node)
465c8c19
JJ
8715 ncopies = 1;
8716 else
e8f142e2 8717 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8718
8719 gcc_assert (ncopies >= 1);
8720 if (reduc_index && ncopies > 1)
8721 return false; /* FORNOW */
8722
4e71066d
RG
8723 cond_expr = gimple_assign_rhs1 (stmt);
8724 then_clause = gimple_assign_rhs2 (stmt);
8725 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8726
4fc5ebf1 8727 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8728 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8729 || !comp_vectype)
ebfd146a
IR
8730 return false;
8731
894dd753 8732 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
2947d3b2 8733 return false;
894dd753 8734 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
ebfd146a 8735 return false;
2947d3b2
IE
8736
8737 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8738 return false;
8739
8740 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8741 return false;
8742
28b33016
IE
8743 masked = !COMPARISON_CLASS_P (cond_expr);
8744 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8745
74946978
MP
8746 if (vec_cmp_type == NULL_TREE)
8747 return false;
784fb9b3 8748
01216d27
JJ
8749 cond_code = TREE_CODE (cond_expr);
8750 if (!masked)
8751 {
8752 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8753 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8754 }
8755
8756 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8757 {
8758 /* Boolean values may have another representation in vectors
8759 and therefore we prefer bit operations over comparison for
8760 them (which also works for scalar masks). We store opcodes
8761 to use in bitop1 and bitop2. Statement is vectorized as
8762 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8763 depending on bitop1 and bitop2 arity. */
8764 switch (cond_code)
8765 {
8766 case GT_EXPR:
8767 bitop1 = BIT_NOT_EXPR;
8768 bitop2 = BIT_AND_EXPR;
8769 break;
8770 case GE_EXPR:
8771 bitop1 = BIT_NOT_EXPR;
8772 bitop2 = BIT_IOR_EXPR;
8773 break;
8774 case LT_EXPR:
8775 bitop1 = BIT_NOT_EXPR;
8776 bitop2 = BIT_AND_EXPR;
8777 std::swap (cond_expr0, cond_expr1);
8778 break;
8779 case LE_EXPR:
8780 bitop1 = BIT_NOT_EXPR;
8781 bitop2 = BIT_IOR_EXPR;
8782 std::swap (cond_expr0, cond_expr1);
8783 break;
8784 case NE_EXPR:
8785 bitop1 = BIT_XOR_EXPR;
8786 break;
8787 case EQ_EXPR:
8788 bitop1 = BIT_XOR_EXPR;
8789 bitop2 = BIT_NOT_EXPR;
8790 break;
8791 default:
8792 return false;
8793 }
8794 cond_code = SSA_NAME;
8795 }
8796
b8698a0f 8797 if (!vec_stmt)
ebfd146a 8798 {
01216d27
JJ
8799 if (bitop1 != NOP_EXPR)
8800 {
8801 machine_mode mode = TYPE_MODE (comp_vectype);
8802 optab optab;
8803
8804 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8805 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8806 return false;
8807
8808 if (bitop2 != NOP_EXPR)
8809 {
8810 optab = optab_for_tree_code (bitop2, comp_vectype,
8811 optab_default);
8812 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8813 return false;
8814 }
8815 }
4fc5ebf1
JG
8816 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8817 cond_code))
8818 {
68435eb2
RB
8819 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8820 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8821 cost_vec);
4fc5ebf1
JG
8822 return true;
8823 }
8824 return false;
ebfd146a
IR
8825 }
8826
f7e531cf
IR
8827 /* Transform. */
8828
8829 if (!slp_node)
8830 {
9771b263
DN
8831 vec_oprnds0.create (1);
8832 vec_oprnds1.create (1);
8833 vec_oprnds2.create (1);
8834 vec_oprnds3.create (1);
f7e531cf 8835 }
ebfd146a
IR
8836
8837 /* Handle def. */
8838 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8839 if (reduction_type != EXTRACT_LAST_REDUCTION)
8840 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8841
8842 /* Handle cond expr. */
a855b1b1
MM
8843 for (j = 0; j < ncopies; j++)
8844 {
bb6c2b68 8845 gimple *new_stmt = NULL;
a855b1b1
MM
8846 if (j == 0)
8847 {
f7e531cf
IR
8848 if (slp_node)
8849 {
00f96dc9
TS
8850 auto_vec<tree, 4> ops;
8851 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8852
a414c77f 8853 if (masked)
01216d27 8854 ops.safe_push (cond_expr);
a414c77f
IE
8855 else
8856 {
01216d27
JJ
8857 ops.safe_push (cond_expr0);
8858 ops.safe_push (cond_expr1);
a414c77f 8859 }
9771b263
DN
8860 ops.safe_push (then_clause);
8861 ops.safe_push (else_clause);
306b0c92 8862 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8863 vec_oprnds3 = vec_defs.pop ();
8864 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8865 if (!masked)
8866 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8867 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8868 }
8869 else
8870 {
a414c77f
IE
8871 if (masked)
8872 {
8873 vec_cond_lhs
8874 = vect_get_vec_def_for_operand (cond_expr, stmt,
8875 comp_vectype);
894dd753 8876 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
a414c77f
IE
8877 }
8878 else
8879 {
01216d27
JJ
8880 vec_cond_lhs
8881 = vect_get_vec_def_for_operand (cond_expr0,
8882 stmt, comp_vectype);
894dd753 8883 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
01216d27
JJ
8884
8885 vec_cond_rhs
8886 = vect_get_vec_def_for_operand (cond_expr1,
8887 stmt, comp_vectype);
894dd753 8888 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
a414c77f 8889 }
f7e531cf
IR
8890 if (reduc_index == 1)
8891 vec_then_clause = reduc_def;
8892 else
8893 {
8894 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241 8895 stmt);
894dd753 8896 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
f7e531cf
IR
8897 }
8898 if (reduc_index == 2)
8899 vec_else_clause = reduc_def;
8900 else
8901 {
8902 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241 8903 stmt);
894dd753 8904 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
f7e531cf 8905 }
a855b1b1
MM
8906 }
8907 }
8908 else
8909 {
a414c77f
IE
8910 vec_cond_lhs
8911 = vect_get_vec_def_for_stmt_copy (dts[0],
8912 vec_oprnds0.pop ());
8913 if (!masked)
8914 vec_cond_rhs
8915 = vect_get_vec_def_for_stmt_copy (dts[1],
8916 vec_oprnds1.pop ());
8917
a855b1b1 8918 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8919 vec_oprnds2.pop ());
a855b1b1 8920 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8921 vec_oprnds3.pop ());
f7e531cf
IR
8922 }
8923
8924 if (!slp_node)
8925 {
9771b263 8926 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8927 if (!masked)
8928 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8929 vec_oprnds2.quick_push (vec_then_clause);
8930 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8931 }
8932
9dc3f7de 8933 /* Arguments are ready. Create the new vector stmt. */
9771b263 8934 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8935 {
9771b263
DN
8936 vec_then_clause = vec_oprnds2[i];
8937 vec_else_clause = vec_oprnds3[i];
a855b1b1 8938
a414c77f
IE
8939 if (masked)
8940 vec_compare = vec_cond_lhs;
8941 else
8942 {
8943 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8944 if (bitop1 == NOP_EXPR)
8945 vec_compare = build2 (cond_code, vec_cmp_type,
8946 vec_cond_lhs, vec_cond_rhs);
8947 else
8948 {
8949 new_temp = make_ssa_name (vec_cmp_type);
8950 if (bitop1 == BIT_NOT_EXPR)
8951 new_stmt = gimple_build_assign (new_temp, bitop1,
8952 vec_cond_rhs);
8953 else
8954 new_stmt
8955 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8956 vec_cond_rhs);
8957 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8958 if (bitop2 == NOP_EXPR)
8959 vec_compare = new_temp;
8960 else if (bitop2 == BIT_NOT_EXPR)
8961 {
8962 /* Instead of doing ~x ? y : z do x ? z : y. */
8963 vec_compare = new_temp;
8964 std::swap (vec_then_clause, vec_else_clause);
8965 }
8966 else
8967 {
8968 vec_compare = make_ssa_name (vec_cmp_type);
8969 new_stmt
8970 = gimple_build_assign (vec_compare, bitop2,
8971 vec_cond_lhs, new_temp);
8972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8973 }
8974 }
a414c77f 8975 }
bb6c2b68
RS
8976 if (reduction_type == EXTRACT_LAST_REDUCTION)
8977 {
8978 if (!is_gimple_val (vec_compare))
8979 {
8980 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8981 new_stmt = gimple_build_assign (vec_compare_name,
8982 vec_compare);
8983 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8984 vec_compare = vec_compare_name;
8985 }
8986 gcc_assert (reduc_index == 2);
8987 new_stmt = gimple_build_call_internal
8988 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8989 vec_then_clause);
8990 gimple_call_set_lhs (new_stmt, scalar_dest);
8991 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8992 if (stmt == gsi_stmt (*gsi))
8993 vect_finish_replace_stmt (stmt, new_stmt);
8994 else
8995 {
8996 /* In this case we're moving the definition to later in the
8997 block. That doesn't matter because the only uses of the
8998 lhs are in phi statements. */
8999 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9000 gsi_remove (&old_gsi, true);
9001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9002 }
9003 }
9004 else
9005 {
9006 new_temp = make_ssa_name (vec_dest);
9007 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9008 vec_compare, vec_then_clause,
9009 vec_else_clause);
9010 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9011 }
f7e531cf 9012 if (slp_node)
9771b263 9013 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
9014 }
9015
9016 if (slp_node)
9017 continue;
9018
9019 if (j == 0)
9020 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9021 else
9022 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9023
9024 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 9025 }
b8698a0f 9026
9771b263
DN
9027 vec_oprnds0.release ();
9028 vec_oprnds1.release ();
9029 vec_oprnds2.release ();
9030 vec_oprnds3.release ();
f7e531cf 9031
ebfd146a
IR
9032 return true;
9033}
9034
42fd8198
IE
9035/* vectorizable_comparison.
9036
9037 Check if STMT is comparison expression that can be vectorized.
9038 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9039 comparison, put it in VEC_STMT, and insert it at GSI.
9040
9041 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9042
fce57248 9043static bool
42fd8198
IE
9044vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9045 gimple **vec_stmt, tree reduc_def,
68435eb2 9046 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198
IE
9047{
9048 tree lhs, rhs1, rhs2;
9049 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9050 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9051 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9052 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9053 tree new_temp;
9054 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9055 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9056 int ndts = 2;
928686b1 9057 poly_uint64 nunits;
42fd8198 9058 int ncopies;
49e76ff1 9059 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9060 stmt_vec_info prev_stmt_info = NULL;
9061 int i, j;
9062 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9063 vec<tree> vec_oprnds0 = vNULL;
9064 vec<tree> vec_oprnds1 = vNULL;
42fd8198
IE
9065 tree mask_type;
9066 tree mask;
9067
c245362b
IE
9068 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9069 return false;
9070
30480bcd 9071 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9072 return false;
9073
9074 mask_type = vectype;
9075 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9076
fce57248 9077 if (slp_node)
42fd8198
IE
9078 ncopies = 1;
9079 else
e8f142e2 9080 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9081
9082 gcc_assert (ncopies >= 1);
42fd8198
IE
9083 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9084 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9085 && reduc_def))
9086 return false;
9087
9088 if (STMT_VINFO_LIVE_P (stmt_info))
9089 {
9090 if (dump_enabled_p ())
9091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9092 "value used after loop.\n");
9093 return false;
9094 }
9095
9096 if (!is_gimple_assign (stmt))
9097 return false;
9098
9099 code = gimple_assign_rhs_code (stmt);
9100
9101 if (TREE_CODE_CLASS (code) != tcc_comparison)
9102 return false;
9103
9104 rhs1 = gimple_assign_rhs1 (stmt);
9105 rhs2 = gimple_assign_rhs2 (stmt);
9106
894dd753 9107 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
42fd8198
IE
9108 return false;
9109
894dd753 9110 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
42fd8198
IE
9111 return false;
9112
9113 if (vectype1 && vectype2
928686b1
RS
9114 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9115 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9116 return false;
9117
9118 vectype = vectype1 ? vectype1 : vectype2;
9119
9120 /* Invariant comparison. */
9121 if (!vectype)
9122 {
69a9a66f 9123 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9124 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9125 return false;
9126 }
928686b1 9127 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9128 return false;
9129
49e76ff1
IE
9130 /* Can't compare mask and non-mask types. */
9131 if (vectype1 && vectype2
9132 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9133 return false;
9134
9135 /* Boolean values may have another representation in vectors
9136 and therefore we prefer bit operations over comparison for
9137 them (which also works for scalar masks). We store opcodes
9138 to use in bitop1 and bitop2. Statement is vectorized as
9139 BITOP2 (rhs1 BITOP1 rhs2) or
9140 rhs1 BITOP2 (BITOP1 rhs2)
9141 depending on bitop1 and bitop2 arity. */
9142 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9143 {
9144 if (code == GT_EXPR)
9145 {
9146 bitop1 = BIT_NOT_EXPR;
9147 bitop2 = BIT_AND_EXPR;
9148 }
9149 else if (code == GE_EXPR)
9150 {
9151 bitop1 = BIT_NOT_EXPR;
9152 bitop2 = BIT_IOR_EXPR;
9153 }
9154 else if (code == LT_EXPR)
9155 {
9156 bitop1 = BIT_NOT_EXPR;
9157 bitop2 = BIT_AND_EXPR;
9158 std::swap (rhs1, rhs2);
264d951a 9159 std::swap (dts[0], dts[1]);
49e76ff1
IE
9160 }
9161 else if (code == LE_EXPR)
9162 {
9163 bitop1 = BIT_NOT_EXPR;
9164 bitop2 = BIT_IOR_EXPR;
9165 std::swap (rhs1, rhs2);
264d951a 9166 std::swap (dts[0], dts[1]);
49e76ff1
IE
9167 }
9168 else
9169 {
9170 bitop1 = BIT_XOR_EXPR;
9171 if (code == EQ_EXPR)
9172 bitop2 = BIT_NOT_EXPR;
9173 }
9174 }
9175
42fd8198
IE
9176 if (!vec_stmt)
9177 {
49e76ff1 9178 if (bitop1 == NOP_EXPR)
68435eb2
RB
9179 {
9180 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9181 return false;
9182 }
49e76ff1
IE
9183 else
9184 {
9185 machine_mode mode = TYPE_MODE (vectype);
9186 optab optab;
9187
9188 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9189 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9190 return false;
9191
9192 if (bitop2 != NOP_EXPR)
9193 {
9194 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9195 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9196 return false;
9197 }
49e76ff1 9198 }
68435eb2
RB
9199
9200 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9201 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9202 dts, ndts, slp_node, cost_vec);
9203 return true;
42fd8198
IE
9204 }
9205
9206 /* Transform. */
9207 if (!slp_node)
9208 {
9209 vec_oprnds0.create (1);
9210 vec_oprnds1.create (1);
9211 }
9212
9213 /* Handle def. */
9214 lhs = gimple_assign_lhs (stmt);
9215 mask = vect_create_destination_var (lhs, mask_type);
9216
9217 /* Handle cmp expr. */
9218 for (j = 0; j < ncopies; j++)
9219 {
9220 gassign *new_stmt = NULL;
9221 if (j == 0)
9222 {
9223 if (slp_node)
9224 {
9225 auto_vec<tree, 2> ops;
9226 auto_vec<vec<tree>, 2> vec_defs;
9227
9228 ops.safe_push (rhs1);
9229 ops.safe_push (rhs2);
306b0c92 9230 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9231 vec_oprnds1 = vec_defs.pop ();
9232 vec_oprnds0 = vec_defs.pop ();
9233 }
9234 else
9235 {
e4af0bc4
IE
9236 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9237 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9238 }
9239 }
9240 else
9241 {
9242 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9243 vec_oprnds0.pop ());
9244 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9245 vec_oprnds1.pop ());
9246 }
9247
9248 if (!slp_node)
9249 {
9250 vec_oprnds0.quick_push (vec_rhs1);
9251 vec_oprnds1.quick_push (vec_rhs2);
9252 }
9253
9254 /* Arguments are ready. Create the new vector stmt. */
9255 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9256 {
9257 vec_rhs2 = vec_oprnds1[i];
9258
9259 new_temp = make_ssa_name (mask);
49e76ff1
IE
9260 if (bitop1 == NOP_EXPR)
9261 {
9262 new_stmt = gimple_build_assign (new_temp, code,
9263 vec_rhs1, vec_rhs2);
9264 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9265 }
9266 else
9267 {
9268 if (bitop1 == BIT_NOT_EXPR)
9269 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9270 else
9271 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9272 vec_rhs2);
9273 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9274 if (bitop2 != NOP_EXPR)
9275 {
9276 tree res = make_ssa_name (mask);
9277 if (bitop2 == BIT_NOT_EXPR)
9278 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9279 else
9280 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9281 new_temp);
9282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9283 }
9284 }
42fd8198
IE
9285 if (slp_node)
9286 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9287 }
9288
9289 if (slp_node)
9290 continue;
9291
9292 if (j == 0)
9293 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9294 else
9295 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9296
9297 prev_stmt_info = vinfo_for_stmt (new_stmt);
9298 }
9299
9300 vec_oprnds0.release ();
9301 vec_oprnds1.release ();
9302
9303 return true;
9304}
ebfd146a 9305
68a0f2ff
RS
9306/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9307 can handle all live statements in the node. Otherwise return true
9308 if STMT is not live or if vectorizable_live_operation can handle it.
9309 GSI and VEC_STMT are as for vectorizable_live_operation. */
9310
9311static bool
9312can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
68435eb2
RB
9313 slp_tree slp_node, gimple **vec_stmt,
9314 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9315{
9316 if (slp_node)
9317 {
9318 gimple *slp_stmt;
9319 unsigned int i;
9320 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9321 {
9322 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9323 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9324 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
68435eb2 9325 vec_stmt, cost_vec))
68a0f2ff
RS
9326 return false;
9327 }
9328 }
9329 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
68435eb2
RB
9330 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9331 cost_vec))
68a0f2ff
RS
9332 return false;
9333
9334 return true;
9335}
9336
8644a673 9337/* Make sure the statement is vectorizable. */
ebfd146a
IR
9338
9339bool
891ad31c 9340vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
68435eb2 9341 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
ebfd146a 9342{
8644a673 9343 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 9344 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9345 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9346 bool ok;
355fe088 9347 gimple *pattern_stmt;
363477c0 9348 gimple_seq pattern_def_seq;
ebfd146a 9349
73fbfcad 9350 if (dump_enabled_p ())
ebfd146a 9351 {
78c60e3d
SS
9352 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9353 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9354 }
ebfd146a 9355
1825a1f3 9356 if (gimple_has_volatile_ops (stmt))
b8698a0f 9357 {
73fbfcad 9358 if (dump_enabled_p ())
78c60e3d 9359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9360 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9361
9362 return false;
9363 }
b8698a0f 9364
d54a098e
RS
9365 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9366 && node == NULL
9367 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9368 {
9369 gimple_stmt_iterator si;
9370
9371 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9372 {
9373 gimple *pattern_def_stmt = gsi_stmt (si);
9374 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9375 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9376 {
9377 /* Analyze def stmt of STMT if it's a pattern stmt. */
9378 if (dump_enabled_p ())
9379 {
9380 dump_printf_loc (MSG_NOTE, vect_location,
9381 "==> examining pattern def statement: ");
9382 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9383 }
9384
9385 if (!vect_analyze_stmt (pattern_def_stmt,
9386 need_to_vectorize, node, node_instance,
9387 cost_vec))
9388 return false;
9389 }
9390 }
9391 }
9392
b8698a0f 9393 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9394 to include:
9395 - the COND_EXPR which is the loop exit condition
9396 - any LABEL_EXPRs in the loop
b8698a0f 9397 - computations that are used only for array indexing or loop control.
8644a673 9398 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9399 instance, therefore, all the statements are relevant.
ebfd146a 9400
d092494c 9401 Pattern statement needs to be analyzed instead of the original statement
83197f37 9402 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9403 statements. In basic blocks we are called from some SLP instance
9404 traversal, don't analyze pattern stmts instead, the pattern stmts
9405 already will be part of SLP instance. */
83197f37
IR
9406
9407 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9408 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9409 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9410 {
9d5e7640 9411 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 9412 && pattern_stmt
9d5e7640
IR
9413 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9414 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9415 {
83197f37 9416 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
9417 stmt = pattern_stmt;
9418 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 9419 if (dump_enabled_p ())
9d5e7640 9420 {
78c60e3d
SS
9421 dump_printf_loc (MSG_NOTE, vect_location,
9422 "==> examining pattern statement: ");
9423 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9424 }
9425 }
9426 else
9427 {
73fbfcad 9428 if (dump_enabled_p ())
e645e942 9429 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9430
9d5e7640
IR
9431 return true;
9432 }
8644a673 9433 }
83197f37 9434 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9435 && node == NULL
83197f37
IR
9436 && pattern_stmt
9437 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9438 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9439 {
9440 /* Analyze PATTERN_STMT too. */
73fbfcad 9441 if (dump_enabled_p ())
83197f37 9442 {
78c60e3d
SS
9443 dump_printf_loc (MSG_NOTE, vect_location,
9444 "==> examining pattern statement: ");
9445 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9446 }
9447
891ad31c 9448 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
68435eb2 9449 node_instance, cost_vec))
83197f37
IR
9450 return false;
9451 }
ebfd146a 9452
8644a673
IR
9453 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9454 {
9455 case vect_internal_def:
9456 break;
ebfd146a 9457
8644a673 9458 case vect_reduction_def:
7c5222ff 9459 case vect_nested_cycle:
14a61437
RB
9460 gcc_assert (!bb_vinfo
9461 && (relevance == vect_used_in_outer
9462 || relevance == vect_used_in_outer_by_reduction
9463 || relevance == vect_used_by_reduction
b28ead45
AH
9464 || relevance == vect_unused_in_scope
9465 || relevance == vect_used_only_live));
8644a673
IR
9466 break;
9467
9468 case vect_induction_def:
e7baeb39
RB
9469 gcc_assert (!bb_vinfo);
9470 break;
9471
8644a673
IR
9472 case vect_constant_def:
9473 case vect_external_def:
9474 case vect_unknown_def_type:
9475 default:
9476 gcc_unreachable ();
9477 }
ebfd146a 9478
8644a673 9479 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9480 {
8644a673 9481 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9482 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9483 || (is_gimple_call (stmt)
9484 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9485 *need_to_vectorize = true;
ebfd146a
IR
9486 }
9487
b1af7da6
RB
9488 if (PURE_SLP_STMT (stmt_info) && !node)
9489 {
9490 dump_printf_loc (MSG_NOTE, vect_location,
9491 "handled only by SLP analysis\n");
9492 return true;
9493 }
9494
9495 ok = true;
9496 if (!bb_vinfo
9497 && (STMT_VINFO_RELEVANT_P (stmt_info)
9498 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
68435eb2
RB
9499 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9500 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9501 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9502 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9503 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9504 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9505 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9506 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9507 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9508 cost_vec)
9509 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9510 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9511 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
b1af7da6
RB
9512 else
9513 {
9514 if (bb_vinfo)
68435eb2
RB
9515 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9516 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9517 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9518 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9519 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9520 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9521 cost_vec)
9522 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9523 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9524 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9525 cost_vec)
9526 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9527 cost_vec));
b1af7da6 9528 }
8644a673
IR
9529
9530 if (!ok)
ebfd146a 9531 {
73fbfcad 9532 if (dump_enabled_p ())
8644a673 9533 {
78c60e3d
SS
9534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9535 "not vectorized: relevant stmt not ");
9536 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9537 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9538 }
b8698a0f 9539
ebfd146a
IR
9540 return false;
9541 }
9542
8644a673
IR
9543 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9544 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9545 if (!bb_vinfo
9546 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9547 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
ebfd146a 9548 {
73fbfcad 9549 if (dump_enabled_p ())
8644a673 9550 {
78c60e3d 9551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9552 "not vectorized: live stmt not supported: ");
78c60e3d 9553 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9554 }
b8698a0f 9555
8644a673 9556 return false;
ebfd146a
IR
9557 }
9558
ebfd146a
IR
9559 return true;
9560}
9561
9562
9563/* Function vect_transform_stmt.
9564
9565 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9566
9567bool
355fe088 9568vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9569 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9570 slp_instance slp_node_instance)
9571{
9572 bool is_store = false;
355fe088 9573 gimple *vec_stmt = NULL;
ebfd146a 9574 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 9575 bool done;
ebfd146a 9576
fce57248 9577 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 9578 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9579
e57d9a82
RB
9580 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9581 && nested_in_vect_loop_p
9582 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9583 stmt));
9584
ebfd146a
IR
9585 switch (STMT_VINFO_TYPE (stmt_info))
9586 {
9587 case type_demotion_vec_info_type:
ebfd146a 9588 case type_promotion_vec_info_type:
ebfd146a 9589 case type_conversion_vec_info_type:
68435eb2 9590 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9591 gcc_assert (done);
9592 break;
9593
9594 case induc_vec_info_type:
68435eb2 9595 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9596 gcc_assert (done);
9597 break;
9598
9dc3f7de 9599 case shift_vec_info_type:
68435eb2 9600 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9601 gcc_assert (done);
9602 break;
9603
ebfd146a 9604 case op_vec_info_type:
68435eb2 9605 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9606 gcc_assert (done);
9607 break;
9608
9609 case assignment_vec_info_type:
68435eb2 9610 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9611 gcc_assert (done);
9612 break;
9613
9614 case load_vec_info_type:
b8698a0f 9615 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9616 slp_node_instance, NULL);
ebfd146a
IR
9617 gcc_assert (done);
9618 break;
9619
9620 case store_vec_info_type:
68435eb2 9621 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9622 gcc_assert (done);
0d0293ac 9623 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9624 {
9625 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9626 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9627 one are skipped, and there vec_stmt_info shouldn't be freed
9628 meanwhile. */
0d0293ac 9629 *grouped_store = true;
f307441a 9630 stmt_vec_info group_info
2c53b149
RB
9631 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
9632 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
ebfd146a 9633 is_store = true;
f307441a 9634 }
ebfd146a
IR
9635 else
9636 is_store = true;
9637 break;
9638
9639 case condition_vec_info_type:
68435eb2 9640 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
ebfd146a
IR
9641 gcc_assert (done);
9642 break;
9643
42fd8198 9644 case comparison_vec_info_type:
68435eb2 9645 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
42fd8198
IE
9646 gcc_assert (done);
9647 break;
9648
ebfd146a 9649 case call_vec_info_type:
68435eb2 9650 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9651 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9652 break;
9653
0136f8f0 9654 case call_simd_clone_vec_info_type:
68435eb2 9655 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
0136f8f0
AH
9656 stmt = gsi_stmt (*gsi);
9657 break;
9658
ebfd146a 9659 case reduc_vec_info_type:
891ad31c 9660 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9661 slp_node_instance, NULL);
ebfd146a
IR
9662 gcc_assert (done);
9663 break;
9664
9665 default:
9666 if (!STMT_VINFO_LIVE_P (stmt_info))
9667 {
73fbfcad 9668 if (dump_enabled_p ())
78c60e3d 9669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9670 "stmt not supported.\n");
ebfd146a
IR
9671 gcc_unreachable ();
9672 }
9673 }
9674
225ce44b
RB
9675 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9676 This would break hybrid SLP vectorization. */
9677 if (slp_node)
d90f8440
RB
9678 gcc_assert (!vec_stmt
9679 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9680
ebfd146a
IR
9681 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9682 is being vectorized, but outside the immediately enclosing loop. */
9683 if (vec_stmt
e57d9a82 9684 && nested_p
ebfd146a
IR
9685 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9686 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9687 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9688 vect_used_in_outer_by_reduction))
ebfd146a 9689 {
a70d6342
IR
9690 struct loop *innerloop = LOOP_VINFO_LOOP (
9691 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9692 imm_use_iterator imm_iter;
9693 use_operand_p use_p;
9694 tree scalar_dest;
355fe088 9695 gimple *exit_phi;
ebfd146a 9696
73fbfcad 9697 if (dump_enabled_p ())
78c60e3d 9698 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9699 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9700
9701 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9702 (to be used when vectorizing outer-loop stmts that use the DEF of
9703 STMT). */
9704 if (gimple_code (stmt) == GIMPLE_PHI)
9705 scalar_dest = PHI_RESULT (stmt);
9706 else
9707 scalar_dest = gimple_assign_lhs (stmt);
9708
9709 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9710 {
9711 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9712 {
9713 exit_phi = USE_STMT (use_p);
9714 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9715 }
9716 }
9717 }
9718
9719 /* Handle stmts whose DEF is used outside the loop-nest that is
9720 being vectorized. */
68a0f2ff 9721 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9722 {
68435eb2 9723 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
ebfd146a
IR
9724 gcc_assert (done);
9725 }
9726
9727 if (vec_stmt)
83197f37 9728 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9729
b8698a0f 9730 return is_store;
ebfd146a
IR
9731}
9732
9733
b8698a0f 9734/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9735 stmt_vec_info. */
9736
9737void
355fe088 9738vect_remove_stores (gimple *first_stmt)
ebfd146a 9739{
355fe088
TS
9740 gimple *next = first_stmt;
9741 gimple *tmp;
ebfd146a
IR
9742 gimple_stmt_iterator next_si;
9743
9744 while (next)
9745 {
78048b1c
JJ
9746 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9747
2c53b149 9748 tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
78048b1c
JJ
9749 if (is_pattern_stmt_p (stmt_info))
9750 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9751 /* Free the attached stmt_vec_info and remove the stmt. */
9752 next_si = gsi_for_stmt (next);
3d3f2249 9753 unlink_stmt_vdef (next);
ebfd146a 9754 gsi_remove (&next_si, true);
3d3f2249 9755 release_defs (next);
ebfd146a
IR
9756 free_stmt_vec_info (next);
9757 next = tmp;
9758 }
9759}
9760
9761
9762/* Function new_stmt_vec_info.
9763
9764 Create and initialize a new stmt_vec_info struct for STMT. */
9765
9766stmt_vec_info
310213d4 9767new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9768{
9769 stmt_vec_info res;
9770 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9771
9772 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9773 STMT_VINFO_STMT (res) = stmt;
310213d4 9774 res->vinfo = vinfo;
8644a673 9775 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9776 STMT_VINFO_LIVE_P (res) = false;
9777 STMT_VINFO_VECTYPE (res) = NULL;
9778 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9779 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9780 STMT_VINFO_IN_PATTERN_P (res) = false;
9781 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9782 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9783 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9784 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9785 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9786
ebfd146a
IR
9787 if (gimple_code (stmt) == GIMPLE_PHI
9788 && is_loop_header_bb_p (gimple_bb (stmt)))
9789 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9790 else
8644a673
IR
9791 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9792
9771b263 9793 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9794 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9795 STMT_VINFO_NUM_SLP_USES (res) = 0;
9796
2c53b149
RB
9797 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9798 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9799 res->size = 0; /* GROUP_SIZE */
9800 res->store_count = 0; /* GROUP_STORE_COUNT */
9801 res->gap = 0; /* GROUP_GAP */
9802 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
ebfd146a 9803
ca823c85
RB
9804 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9805 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9806
ebfd146a
IR
9807 return res;
9808}
9809
9810
f8c0baaf 9811/* Set the current stmt_vec_info vector to V. */
ebfd146a
IR
9812
9813void
f8c0baaf 9814set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
ebfd146a 9815{
f8c0baaf 9816 stmt_vec_info_vec = v;
ebfd146a
IR
9817}
9818
f8c0baaf 9819/* Free the stmt_vec_info entries in V and release V. */
ebfd146a
IR
9820
9821void
f8c0baaf 9822free_stmt_vec_infos (vec<stmt_vec_info> *v)
ebfd146a 9823{
93675444 9824 unsigned int i;
3161455c 9825 stmt_vec_info info;
f8c0baaf 9826 FOR_EACH_VEC_ELT (*v, i, info)
93675444 9827 if (info != NULL)
3161455c 9828 free_stmt_vec_info (STMT_VINFO_STMT (info));
f8c0baaf
RB
9829 if (v == stmt_vec_info_vec)
9830 stmt_vec_info_vec = NULL;
9831 v->release ();
ebfd146a
IR
9832}
9833
9834
9835/* Free stmt vectorization related info. */
9836
9837void
355fe088 9838free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9839{
9840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9841
9842 if (!stmt_info)
9843 return;
9844
78048b1c
JJ
9845 /* Check if this statement has a related "pattern stmt"
9846 (introduced by the vectorizer during the pattern recognition
9847 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9848 too. */
9849 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9850 {
e3947d80
RS
9851 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9852 for (gimple_stmt_iterator si = gsi_start (seq);
9853 !gsi_end_p (si); gsi_next (&si))
9854 {
9855 gimple *seq_stmt = gsi_stmt (si);
9856 gimple_set_bb (seq_stmt, NULL);
9857 tree lhs = gimple_get_lhs (seq_stmt);
9858 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9859 release_ssa_name (lhs);
9860 free_stmt_vec_info (seq_stmt);
9861 }
78048b1c
JJ
9862 stmt_vec_info patt_info
9863 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9864 if (patt_info)
9865 {
355fe088 9866 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9867 gimple_set_bb (patt_stmt, NULL);
9868 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9869 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9870 release_ssa_name (lhs);
f0281fde 9871 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9872 }
9873 }
9874
9771b263 9875 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9876 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9877 set_vinfo_for_stmt (stmt, NULL);
9878 free (stmt_info);
9879}
9880
9881
bb67d9c7 9882/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9883
bb67d9c7 9884 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9885 by the target. */
9886
c803b2a9 9887tree
86e36728 9888get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9889{
c7d97b28 9890 tree orig_scalar_type = scalar_type;
3bd8f481 9891 scalar_mode inner_mode;
ef4bddc2 9892 machine_mode simd_mode;
86e36728 9893 poly_uint64 nunits;
ebfd146a
IR
9894 tree vectype;
9895
3bd8f481
RS
9896 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9897 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9898 return NULL_TREE;
9899
3bd8f481 9900 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9901
7b7b1813
RG
9902 /* For vector types of elements whose mode precision doesn't
9903 match their types precision we use a element type of mode
9904 precision. The vectorization routines will have to make sure
48f2e373
RB
9905 they support the proper result truncation/extension.
9906 We also make sure to build vector types with INTEGER_TYPE
9907 component type only. */
6d7971b8 9908 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9909 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9910 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9911 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9912 TYPE_UNSIGNED (scalar_type));
6d7971b8 9913
ccbf5bb4
RG
9914 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9915 When the component mode passes the above test simply use a type
9916 corresponding to that mode. The theory is that any use that
9917 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9918 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9919 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9920 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9921
9922 /* We can't build a vector type of elements with alignment bigger than
9923 their size. */
dfc2e2ac 9924 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9925 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9926 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9927
dfc2e2ac
RB
9928 /* If we felt back to using the mode fail if there was
9929 no scalar type for it. */
9930 if (scalar_type == NULL_TREE)
9931 return NULL_TREE;
9932
bb67d9c7
RG
9933 /* If no size was supplied use the mode the target prefers. Otherwise
9934 lookup a vector mode of the specified size. */
86e36728 9935 if (known_eq (size, 0U))
bb67d9c7 9936 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9937 else if (!multiple_p (size, nbytes, &nunits)
9938 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9939 return NULL_TREE;
4c8fd8ac 9940 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9941 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9942 return NULL_TREE;
ebfd146a
IR
9943
9944 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9945
9946 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9947 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9948 return NULL_TREE;
ebfd146a 9949
c7d97b28
RB
9950 /* Re-attach the address-space qualifier if we canonicalized the scalar
9951 type. */
9952 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9953 return build_qualified_type
9954 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9955
ebfd146a
IR
9956 return vectype;
9957}
9958
86e36728 9959poly_uint64 current_vector_size;
bb67d9c7
RG
9960
9961/* Function get_vectype_for_scalar_type.
9962
9963 Returns the vector type corresponding to SCALAR_TYPE as supported
9964 by the target. */
9965
9966tree
9967get_vectype_for_scalar_type (tree scalar_type)
9968{
9969 tree vectype;
9970 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9971 current_vector_size);
9972 if (vectype
86e36728 9973 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9974 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9975 return vectype;
9976}
9977
42fd8198
IE
9978/* Function get_mask_type_for_scalar_type.
9979
9980 Returns the mask type corresponding to a result of comparison
9981 of vectors of specified SCALAR_TYPE as supported by target. */
9982
9983tree
9984get_mask_type_for_scalar_type (tree scalar_type)
9985{
9986 tree vectype = get_vectype_for_scalar_type (scalar_type);
9987
9988 if (!vectype)
9989 return NULL;
9990
9991 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9992 current_vector_size);
9993}
9994
b690cc0f
RG
9995/* Function get_same_sized_vectype
9996
9997 Returns a vector type corresponding to SCALAR_TYPE of size
9998 VECTOR_TYPE if supported by the target. */
9999
10000tree
bb67d9c7 10001get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 10002{
2568d8a1 10003 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
10004 return build_same_sized_truth_vector_type (vector_type);
10005
bb67d9c7
RG
10006 return get_vectype_for_scalar_type_and_size
10007 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
10008}
10009
ebfd146a
IR
10010/* Function vect_is_simple_use.
10011
10012 Input:
81c40241
RB
10013 VINFO - the vect info of the loop or basic block that is being vectorized.
10014 OPERAND - operand in the loop or bb.
10015 Output:
894dd753 10016 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME.
81c40241 10017 DT - the type of definition
ebfd146a
IR
10018
10019 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 10020 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 10021 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 10022 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
10023 is the case in reduction/induction computations).
10024 For basic blocks, supportable operands are constants and bb invariants.
10025 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
10026
10027bool
894dd753
RS
10028vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10029 gimple **def_stmt_out)
b8698a0f 10030{
894dd753
RS
10031 if (def_stmt_out)
10032 *def_stmt_out = NULL;
3fc356dc 10033 *dt = vect_unknown_def_type;
b8698a0f 10034
73fbfcad 10035 if (dump_enabled_p ())
ebfd146a 10036 {
78c60e3d
SS
10037 dump_printf_loc (MSG_NOTE, vect_location,
10038 "vect_is_simple_use: operand ");
30f502ed
RB
10039 if (TREE_CODE (operand) == SSA_NAME
10040 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10041 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10042 else
10043 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 10044 }
b8698a0f 10045
b758f602 10046 if (CONSTANT_CLASS_P (operand))
30f502ed
RB
10047 *dt = vect_constant_def;
10048 else if (is_gimple_min_invariant (operand))
10049 *dt = vect_external_def;
10050 else if (TREE_CODE (operand) != SSA_NAME)
10051 *dt = vect_unknown_def_type;
10052 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
8644a673 10053 *dt = vect_external_def;
ebfd146a
IR
10054 else
10055 {
30f502ed
RB
10056 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10057 if (! vect_stmt_in_region_p (vinfo, def_stmt))
10058 *dt = vect_external_def;
10059 else
0f8c840c 10060 {
30f502ed
RB
10061 stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt);
10062 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
10063 {
10064 def_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
10065 stmt_vinfo = vinfo_for_stmt (def_stmt);
10066 }
10067 switch (gimple_code (def_stmt))
10068 {
10069 case GIMPLE_PHI:
10070 case GIMPLE_ASSIGN:
10071 case GIMPLE_CALL:
10072 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10073 break;
10074 default:
10075 *dt = vect_unknown_def_type;
10076 break;
10077 }
0f8c840c 10078 }
30f502ed
RB
10079 if (def_stmt_out)
10080 *def_stmt_out = def_stmt;
ebfd146a
IR
10081 }
10082
2e8ab70c
RB
10083 if (dump_enabled_p ())
10084 {
30f502ed 10085 dump_printf (MSG_NOTE, ", type of def: ");
2e8ab70c
RB
10086 switch (*dt)
10087 {
10088 case vect_uninitialized_def:
10089 dump_printf (MSG_NOTE, "uninitialized\n");
10090 break;
10091 case vect_constant_def:
10092 dump_printf (MSG_NOTE, "constant\n");
10093 break;
10094 case vect_external_def:
10095 dump_printf (MSG_NOTE, "external\n");
10096 break;
10097 case vect_internal_def:
10098 dump_printf (MSG_NOTE, "internal\n");
10099 break;
10100 case vect_induction_def:
10101 dump_printf (MSG_NOTE, "induction\n");
10102 break;
10103 case vect_reduction_def:
10104 dump_printf (MSG_NOTE, "reduction\n");
10105 break;
10106 case vect_double_reduction_def:
10107 dump_printf (MSG_NOTE, "double reduction\n");
10108 break;
10109 case vect_nested_cycle:
10110 dump_printf (MSG_NOTE, "nested cycle\n");
10111 break;
10112 case vect_unknown_def_type:
10113 dump_printf (MSG_NOTE, "unknown\n");
10114 break;
10115 }
10116 }
10117
81c40241 10118 if (*dt == vect_unknown_def_type)
ebfd146a 10119 {
73fbfcad 10120 if (dump_enabled_p ())
78c60e3d 10121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10122 "Unsupported pattern.\n");
ebfd146a
IR
10123 return false;
10124 }
10125
ebfd146a
IR
10126 return true;
10127}
10128
81c40241 10129/* Function vect_is_simple_use.
b690cc0f 10130
81c40241 10131 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10132 type of OPERAND and stores it to *VECTYPE. If the definition of
10133 OPERAND is vect_uninitialized_def, vect_constant_def or
10134 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10135 is responsible to compute the best suited vector type for the
10136 scalar operand. */
10137
10138bool
894dd753
RS
10139vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10140 tree *vectype, gimple **def_stmt_out)
b690cc0f 10141{
894dd753
RS
10142 gimple *def_stmt;
10143 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt))
b690cc0f
RG
10144 return false;
10145
894dd753
RS
10146 if (def_stmt_out)
10147 *def_stmt_out = def_stmt;
10148
b690cc0f
RG
10149 /* Now get a vector type if the def is internal, otherwise supply
10150 NULL_TREE and leave it up to the caller to figure out a proper
10151 type for the use stmt. */
10152 if (*dt == vect_internal_def
10153 || *dt == vect_induction_def
10154 || *dt == vect_reduction_def
10155 || *dt == vect_double_reduction_def
10156 || *dt == vect_nested_cycle)
10157 {
894dd753 10158 stmt_vec_info stmt_info = vinfo_for_stmt (def_stmt);
b690cc0f
RG
10159 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10160 gcc_assert (*vectype != NULL_TREE);
30f502ed
RB
10161 if (dump_enabled_p ())
10162 {
10163 dump_printf_loc (MSG_NOTE, vect_location,
10164 "vect_is_simple_use: vectype ");
10165 dump_generic_expr (MSG_NOTE, TDF_SLIM, *vectype);
10166 dump_printf (MSG_NOTE, "\n");
10167 }
b690cc0f
RG
10168 }
10169 else if (*dt == vect_uninitialized_def
10170 || *dt == vect_constant_def
10171 || *dt == vect_external_def)
10172 *vectype = NULL_TREE;
10173 else
10174 gcc_unreachable ();
10175
10176 return true;
10177}
10178
ebfd146a
IR
10179
10180/* Function supportable_widening_operation
10181
b8698a0f
L
10182 Check whether an operation represented by the code CODE is a
10183 widening operation that is supported by the target platform in
b690cc0f
RG
10184 vector form (i.e., when operating on arguments of type VECTYPE_IN
10185 producing a result of type VECTYPE_OUT).
b8698a0f 10186
1bda738b
JJ
10187 Widening operations we currently support are NOP (CONVERT), FLOAT,
10188 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10189 are supported by the target platform either directly (via vector
10190 tree-codes), or via target builtins.
ebfd146a
IR
10191
10192 Output:
b8698a0f
L
10193 - CODE1 and CODE2 are codes of vector operations to be used when
10194 vectorizing the operation, if available.
ebfd146a
IR
10195 - MULTI_STEP_CVT determines the number of required intermediate steps in
10196 case of multi-step conversion (like char->short->int - in that case
10197 MULTI_STEP_CVT will be 1).
b8698a0f
L
10198 - INTERM_TYPES contains the intermediate type required to perform the
10199 widening operation (short in the above example). */
ebfd146a
IR
10200
10201bool
355fe088 10202supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10203 tree vectype_out, tree vectype_in,
ebfd146a
IR
10204 enum tree_code *code1, enum tree_code *code2,
10205 int *multi_step_cvt,
9771b263 10206 vec<tree> *interm_types)
ebfd146a
IR
10207{
10208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10209 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10210 struct loop *vect_loop = NULL;
ef4bddc2 10211 machine_mode vec_mode;
81f40b79 10212 enum insn_code icode1, icode2;
ebfd146a 10213 optab optab1, optab2;
b690cc0f
RG
10214 tree vectype = vectype_in;
10215 tree wide_vectype = vectype_out;
ebfd146a 10216 enum tree_code c1, c2;
4a00c761
JJ
10217 int i;
10218 tree prev_type, intermediate_type;
ef4bddc2 10219 machine_mode intermediate_mode, prev_mode;
4a00c761 10220 optab optab3, optab4;
ebfd146a 10221
4a00c761 10222 *multi_step_cvt = 0;
4ef69dfc
IR
10223 if (loop_info)
10224 vect_loop = LOOP_VINFO_LOOP (loop_info);
10225
ebfd146a
IR
10226 switch (code)
10227 {
10228 case WIDEN_MULT_EXPR:
6ae6116f
RH
10229 /* The result of a vectorized widening operation usually requires
10230 two vectors (because the widened results do not fit into one vector).
10231 The generated vector results would normally be expected to be
10232 generated in the same order as in the original scalar computation,
10233 i.e. if 8 results are generated in each vector iteration, they are
10234 to be organized as follows:
10235 vect1: [res1,res2,res3,res4],
10236 vect2: [res5,res6,res7,res8].
10237
10238 However, in the special case that the result of the widening
10239 operation is used in a reduction computation only, the order doesn't
10240 matter (because when vectorizing a reduction we change the order of
10241 the computation). Some targets can take advantage of this and
10242 generate more efficient code. For example, targets like Altivec,
10243 that support widen_mult using a sequence of {mult_even,mult_odd}
10244 generate the following vectors:
10245 vect1: [res1,res3,res5,res7],
10246 vect2: [res2,res4,res6,res8].
10247
10248 When vectorizing outer-loops, we execute the inner-loop sequentially
10249 (each vectorized inner-loop iteration contributes to VF outer-loop
10250 iterations in parallel). We therefore don't allow to change the
10251 order of the computation in the inner-loop during outer-loop
10252 vectorization. */
10253 /* TODO: Another case in which order doesn't *really* matter is when we
10254 widen and then contract again, e.g. (short)((int)x * y >> 8).
10255 Normally, pack_trunc performs an even/odd permute, whereas the
10256 repack from an even/odd expansion would be an interleave, which
10257 would be significantly simpler for e.g. AVX2. */
10258 /* In any case, in order to avoid duplicating the code below, recurse
10259 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10260 are properly set up for the caller. If we fail, we'll continue with
10261 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10262 if (vect_loop
10263 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10264 && !nested_in_vect_loop_p (vect_loop, stmt)
10265 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10266 stmt, vectype_out, vectype_in,
a86ec597
RH
10267 code1, code2, multi_step_cvt,
10268 interm_types))
ebc047a2
CH
10269 {
10270 /* Elements in a vector with vect_used_by_reduction property cannot
10271 be reordered if the use chain with this property does not have the
10272 same operation. One such an example is s += a * b, where elements
10273 in a and b cannot be reordered. Here we check if the vector defined
10274 by STMT is only directly used in the reduction statement. */
10275 tree lhs = gimple_assign_lhs (stmt);
10276 use_operand_p dummy;
355fe088 10277 gimple *use_stmt;
ebc047a2
CH
10278 stmt_vec_info use_stmt_info = NULL;
10279 if (single_imm_use (lhs, &dummy, &use_stmt)
10280 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10281 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10282 return true;
10283 }
4a00c761
JJ
10284 c1 = VEC_WIDEN_MULT_LO_EXPR;
10285 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10286 break;
10287
81c40241
RB
10288 case DOT_PROD_EXPR:
10289 c1 = DOT_PROD_EXPR;
10290 c2 = DOT_PROD_EXPR;
10291 break;
10292
10293 case SAD_EXPR:
10294 c1 = SAD_EXPR;
10295 c2 = SAD_EXPR;
10296 break;
10297
6ae6116f
RH
10298 case VEC_WIDEN_MULT_EVEN_EXPR:
10299 /* Support the recursion induced just above. */
10300 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10301 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10302 break;
10303
36ba4aae 10304 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10305 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10306 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10307 break;
10308
ebfd146a 10309 CASE_CONVERT:
4a00c761
JJ
10310 c1 = VEC_UNPACK_LO_EXPR;
10311 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10312 break;
10313
10314 case FLOAT_EXPR:
4a00c761
JJ
10315 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10316 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10317 break;
10318
10319 case FIX_TRUNC_EXPR:
1bda738b
JJ
10320 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10321 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10322 break;
ebfd146a
IR
10323
10324 default:
10325 gcc_unreachable ();
10326 }
10327
6ae6116f 10328 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10329 std::swap (c1, c2);
4a00c761 10330
ebfd146a
IR
10331 if (code == FIX_TRUNC_EXPR)
10332 {
10333 /* The signedness is determined from output operand. */
b690cc0f
RG
10334 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10335 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10336 }
10337 else
10338 {
10339 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10340 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10341 }
10342
10343 if (!optab1 || !optab2)
10344 return false;
10345
10346 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10347 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10348 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10349 return false;
10350
4a00c761
JJ
10351 *code1 = c1;
10352 *code2 = c2;
10353
10354 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10355 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10356 /* For scalar masks we may have different boolean
10357 vector types having the same QImode. Thus we
10358 add additional check for elements number. */
10359 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10360 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10361 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10362
b8698a0f 10363 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10364 types. */
ebfd146a 10365
4a00c761
JJ
10366 prev_type = vectype;
10367 prev_mode = vec_mode;
b8698a0f 10368
4a00c761
JJ
10369 if (!CONVERT_EXPR_CODE_P (code))
10370 return false;
b8698a0f 10371
4a00c761
JJ
10372 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10373 intermediate steps in promotion sequence. We try
10374 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10375 not. */
9771b263 10376 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10377 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10378 {
10379 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10380 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10381 {
7cfb4d93 10382 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10383 if (intermediate_mode != TYPE_MODE (intermediate_type))
10384 return false;
10385 }
10386 else
10387 intermediate_type
10388 = lang_hooks.types.type_for_mode (intermediate_mode,
10389 TYPE_UNSIGNED (prev_type));
10390
4a00c761
JJ
10391 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10392 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10393
10394 if (!optab3 || !optab4
10395 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10396 || insn_data[icode1].operand[0].mode != intermediate_mode
10397 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10398 || insn_data[icode2].operand[0].mode != intermediate_mode
10399 || ((icode1 = optab_handler (optab3, intermediate_mode))
10400 == CODE_FOR_nothing)
10401 || ((icode2 = optab_handler (optab4, intermediate_mode))
10402 == CODE_FOR_nothing))
10403 break;
ebfd146a 10404
9771b263 10405 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10406 (*multi_step_cvt)++;
10407
10408 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10409 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10410 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10411 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10412 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10413
10414 prev_type = intermediate_type;
10415 prev_mode = intermediate_mode;
ebfd146a
IR
10416 }
10417
9771b263 10418 interm_types->release ();
4a00c761 10419 return false;
ebfd146a
IR
10420}
10421
10422
10423/* Function supportable_narrowing_operation
10424
b8698a0f
L
10425 Check whether an operation represented by the code CODE is a
10426 narrowing operation that is supported by the target platform in
b690cc0f
RG
10427 vector form (i.e., when operating on arguments of type VECTYPE_IN
10428 and producing a result of type VECTYPE_OUT).
b8698a0f 10429
1bda738b
JJ
10430 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10431 and FLOAT. This function checks if these operations are supported by
ebfd146a
IR
10432 the target platform directly via vector tree-codes.
10433
10434 Output:
b8698a0f
L
10435 - CODE1 is the code of a vector operation to be used when
10436 vectorizing the operation, if available.
ebfd146a
IR
10437 - MULTI_STEP_CVT determines the number of required intermediate steps in
10438 case of multi-step conversion (like int->short->char - in that case
10439 MULTI_STEP_CVT will be 1).
10440 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10441 narrowing operation (short in the above example). */
ebfd146a
IR
10442
10443bool
10444supportable_narrowing_operation (enum tree_code code,
b690cc0f 10445 tree vectype_out, tree vectype_in,
ebfd146a 10446 enum tree_code *code1, int *multi_step_cvt,
9771b263 10447 vec<tree> *interm_types)
ebfd146a 10448{
ef4bddc2 10449 machine_mode vec_mode;
ebfd146a
IR
10450 enum insn_code icode1;
10451 optab optab1, interm_optab;
b690cc0f
RG
10452 tree vectype = vectype_in;
10453 tree narrow_vectype = vectype_out;
ebfd146a 10454 enum tree_code c1;
3ae0661a 10455 tree intermediate_type, prev_type;
ef4bddc2 10456 machine_mode intermediate_mode, prev_mode;
ebfd146a 10457 int i;
4a00c761 10458 bool uns;
ebfd146a 10459
4a00c761 10460 *multi_step_cvt = 0;
ebfd146a
IR
10461 switch (code)
10462 {
10463 CASE_CONVERT:
10464 c1 = VEC_PACK_TRUNC_EXPR;
10465 break;
10466
10467 case FIX_TRUNC_EXPR:
10468 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10469 break;
10470
10471 case FLOAT_EXPR:
1bda738b
JJ
10472 c1 = VEC_PACK_FLOAT_EXPR;
10473 break;
ebfd146a
IR
10474
10475 default:
10476 gcc_unreachable ();
10477 }
10478
10479 if (code == FIX_TRUNC_EXPR)
10480 /* The signedness is determined from output operand. */
b690cc0f 10481 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10482 else
10483 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10484
10485 if (!optab1)
10486 return false;
10487
10488 vec_mode = TYPE_MODE (vectype);
947131ba 10489 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10490 return false;
10491
4a00c761
JJ
10492 *code1 = c1;
10493
10494 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10495 /* For scalar masks we may have different boolean
10496 vector types having the same QImode. Thus we
10497 add additional check for elements number. */
10498 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10499 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10500 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10501
1bda738b
JJ
10502 if (code == FLOAT_EXPR)
10503 return false;
10504
ebfd146a
IR
10505 /* Check if it's a multi-step conversion that can be done using intermediate
10506 types. */
4a00c761 10507 prev_mode = vec_mode;
3ae0661a 10508 prev_type = vectype;
4a00c761
JJ
10509 if (code == FIX_TRUNC_EXPR)
10510 uns = TYPE_UNSIGNED (vectype_out);
10511 else
10512 uns = TYPE_UNSIGNED (vectype);
10513
10514 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10515 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10516 costly than signed. */
10517 if (code == FIX_TRUNC_EXPR && uns)
10518 {
10519 enum insn_code icode2;
10520
10521 intermediate_type
10522 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10523 interm_optab
10524 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10525 if (interm_optab != unknown_optab
4a00c761
JJ
10526 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10527 && insn_data[icode1].operand[0].mode
10528 == insn_data[icode2].operand[0].mode)
10529 {
10530 uns = false;
10531 optab1 = interm_optab;
10532 icode1 = icode2;
10533 }
10534 }
ebfd146a 10535
4a00c761
JJ
10536 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10537 intermediate steps in promotion sequence. We try
10538 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10539 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10540 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10541 {
10542 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10543 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10544 {
7cfb4d93 10545 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10546 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10547 return false;
3ae0661a
IE
10548 }
10549 else
10550 intermediate_type
10551 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10552 interm_optab
10553 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10554 optab_default);
10555 if (!interm_optab
10556 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10557 || insn_data[icode1].operand[0].mode != intermediate_mode
10558 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10559 == CODE_FOR_nothing))
10560 break;
10561
9771b263 10562 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10563 (*multi_step_cvt)++;
10564
10565 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10566 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10567 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10568 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10569
10570 prev_mode = intermediate_mode;
3ae0661a 10571 prev_type = intermediate_type;
4a00c761 10572 optab1 = interm_optab;
ebfd146a
IR
10573 }
10574
9771b263 10575 interm_types->release ();
4a00c761 10576 return false;
ebfd146a 10577}
7cfb4d93
RS
10578
10579/* Generate and return a statement that sets vector mask MASK such that
10580 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10581
10582gcall *
10583vect_gen_while (tree mask, tree start_index, tree end_index)
10584{
10585 tree cmp_type = TREE_TYPE (start_index);
10586 tree mask_type = TREE_TYPE (mask);
10587 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10588 cmp_type, mask_type,
10589 OPTIMIZE_FOR_SPEED));
10590 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10591 start_index, end_index,
10592 build_zero_cst (mask_type));
10593 gimple_call_set_lhs (call, mask);
10594 return call;
10595}
535e7c11
RS
10596
10597/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10598 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10599
10600tree
10601vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10602 tree end_index)
10603{
10604 tree tmp = make_ssa_name (mask_type);
10605 gcall *call = vect_gen_while (tmp, start_index, end_index);
10606 gimple_seq_add_stmt (seq, call);
10607 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10608}
1f3cb663
RS
10609
10610/* Try to compute the vector types required to vectorize STMT_INFO,
10611 returning true on success and false if vectorization isn't possible.
10612
10613 On success:
10614
10615 - Set *STMT_VECTYPE_OUT to:
10616 - NULL_TREE if the statement doesn't need to be vectorized;
10617 - boolean_type_node if the statement is a boolean operation whose
10618 vector type can only be determined once all the other vector types
10619 are known; and
10620 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10621
10622 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10623 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10624 statement does not help to determine the overall number of units. */
10625
10626bool
10627vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10628 tree *stmt_vectype_out,
10629 tree *nunits_vectype_out)
10630{
10631 gimple *stmt = stmt_info->stmt;
10632
10633 *stmt_vectype_out = NULL_TREE;
10634 *nunits_vectype_out = NULL_TREE;
10635
10636 if (gimple_get_lhs (stmt) == NULL_TREE
10637 /* MASK_STORE has no lhs, but is ok. */
10638 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10639 {
10640 if (is_a <gcall *> (stmt))
10641 {
10642 /* Ignore calls with no lhs. These must be calls to
10643 #pragma omp simd functions, and what vectorization factor
10644 it really needs can't be determined until
10645 vectorizable_simd_clone_call. */
10646 if (dump_enabled_p ())
10647 dump_printf_loc (MSG_NOTE, vect_location,
10648 "defer to SIMD clone analysis.\n");
10649 return true;
10650 }
10651
10652 if (dump_enabled_p ())
10653 {
10654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10655 "not vectorized: irregular stmt.");
10656 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10657 }
10658 return false;
10659 }
10660
10661 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10662 {
10663 if (dump_enabled_p ())
10664 {
10665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10666 "not vectorized: vector stmt in loop:");
10667 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10668 }
10669 return false;
10670 }
10671
10672 tree vectype;
10673 tree scalar_type = NULL_TREE;
10674 if (STMT_VINFO_VECTYPE (stmt_info))
10675 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10676 else
10677 {
10678 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10679 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10680 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10681 else
10682 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10683
10684 /* Pure bool ops don't participate in number-of-units computation.
10685 For comparisons use the types being compared. */
10686 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10687 && is_gimple_assign (stmt)
10688 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10689 {
10690 *stmt_vectype_out = boolean_type_node;
10691
10692 tree rhs1 = gimple_assign_rhs1 (stmt);
10693 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10694 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10695 scalar_type = TREE_TYPE (rhs1);
10696 else
10697 {
10698 if (dump_enabled_p ())
10699 dump_printf_loc (MSG_NOTE, vect_location,
10700 "pure bool operation.\n");
10701 return true;
10702 }
10703 }
10704
10705 if (dump_enabled_p ())
10706 {
10707 dump_printf_loc (MSG_NOTE, vect_location,
10708 "get vectype for scalar type: ");
10709 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10710 dump_printf (MSG_NOTE, "\n");
10711 }
10712 vectype = get_vectype_for_scalar_type (scalar_type);
10713 if (!vectype)
10714 {
10715 if (dump_enabled_p ())
10716 {
10717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10718 "not vectorized: unsupported data-type ");
10719 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10720 scalar_type);
10721 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10722 }
10723 return false;
10724 }
10725
10726 if (!*stmt_vectype_out)
10727 *stmt_vectype_out = vectype;
10728
10729 if (dump_enabled_p ())
10730 {
10731 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10732 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10733 dump_printf (MSG_NOTE, "\n");
10734 }
10735 }
10736
10737 /* Don't try to compute scalar types if the stmt produces a boolean
10738 vector; use the existing vector type instead. */
10739 tree nunits_vectype;
10740 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10741 nunits_vectype = vectype;
10742 else
10743 {
10744 /* The number of units is set according to the smallest scalar
10745 type (or the largest vector size, but we only support one
10746 vector size per vectorization). */
10747 if (*stmt_vectype_out != boolean_type_node)
10748 {
10749 HOST_WIDE_INT dummy;
10750 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10751 }
10752 if (dump_enabled_p ())
10753 {
10754 dump_printf_loc (MSG_NOTE, vect_location,
10755 "get vectype for scalar type: ");
10756 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10757 dump_printf (MSG_NOTE, "\n");
10758 }
10759 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10760 }
10761 if (!nunits_vectype)
10762 {
10763 if (dump_enabled_p ())
10764 {
10765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10766 "not vectorized: unsupported data-type ");
10767 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10768 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10769 }
10770 return false;
10771 }
10772
10773 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10774 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10775 {
10776 if (dump_enabled_p ())
10777 {
10778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10779 "not vectorized: different sized vector "
10780 "types in statement, ");
10781 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10782 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10783 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10784 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10785 }
10786 return false;
10787 }
10788
10789 if (dump_enabled_p ())
10790 {
10791 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10792 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10793 dump_printf (MSG_NOTE, "\n");
10794
10795 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10796 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10797 dump_printf (MSG_NOTE, "\n");
10798 }
10799
10800 *nunits_vectype_out = nunits_vectype;
10801 return true;
10802}
10803
10804/* Try to determine the correct vector type for STMT_INFO, which is a
10805 statement that produces a scalar boolean result. Return the vector
10806 type on success, otherwise return NULL_TREE. */
10807
10808tree
10809vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10810{
10811 gimple *stmt = stmt_info->stmt;
10812 tree mask_type = NULL;
10813 tree vectype, scalar_type;
10814
10815 if (is_gimple_assign (stmt)
10816 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10817 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10818 {
10819 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10820 mask_type = get_mask_type_for_scalar_type (scalar_type);
10821
10822 if (!mask_type)
10823 {
10824 if (dump_enabled_p ())
10825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10826 "not vectorized: unsupported mask\n");
10827 return NULL_TREE;
10828 }
10829 }
10830 else
10831 {
10832 tree rhs;
10833 ssa_op_iter iter;
1f3cb663
RS
10834 enum vect_def_type dt;
10835
10836 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10837 {
894dd753 10838 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
1f3cb663
RS
10839 {
10840 if (dump_enabled_p ())
10841 {
10842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10843 "not vectorized: can't compute mask type "
10844 "for statement, ");
10845 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10846 0);
10847 }
10848 return NULL_TREE;
10849 }
10850
10851 /* No vectype probably means external definition.
10852 Allow it in case there is another operand which
10853 allows to determine mask type. */
10854 if (!vectype)
10855 continue;
10856
10857 if (!mask_type)
10858 mask_type = vectype;
10859 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10860 TYPE_VECTOR_SUBPARTS (vectype)))
10861 {
10862 if (dump_enabled_p ())
10863 {
10864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10865 "not vectorized: different sized masks "
10866 "types in statement, ");
10867 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10868 mask_type);
10869 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10870 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10871 vectype);
10872 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10873 }
10874 return NULL_TREE;
10875 }
10876 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10877 != VECTOR_BOOLEAN_TYPE_P (vectype))
10878 {
10879 if (dump_enabled_p ())
10880 {
10881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10882 "not vectorized: mixed mask and "
10883 "nonmask vector types in statement, ");
10884 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10885 mask_type);
10886 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10887 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10888 vectype);
10889 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10890 }
10891 return NULL_TREE;
10892 }
10893 }
10894
10895 /* We may compare boolean value loaded as vector of integers.
10896 Fix mask_type in such case. */
10897 if (mask_type
10898 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10899 && gimple_code (stmt) == GIMPLE_ASSIGN
10900 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10901 mask_type = build_same_sized_truth_vector_type (mask_type);
10902 }
10903
10904 /* No mask_type should mean loop invariant predicate.
10905 This is probably a subject for optimization in if-conversion. */
10906 if (!mask_type && dump_enabled_p ())
10907 {
10908 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10909 "not vectorized: can't compute mask type "
10910 "for statement, ");
10911 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10912 }
10913 return mask_type;
10914}