]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
tree-cfg.c (verify_gimple_assign_ternary): Properly verify the [VEC_]COND_EXPR embedd...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
92345349 100 if (body_cost_vec)
c3e7ee41 101 {
92345349 102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
103 stmt_info_for_cost si = { count, kind,
104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 misalign };
106 body_cost_vec->safe_push (si);
c3e7ee41 107 return (unsigned)
92345349 108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109 }
110 else
310213d4
RB
111 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
113}
114
272c6793
RS
115/* Return a variable of type ELEM_TYPE[NELEMS]. */
116
117static tree
118create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119{
120 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 "vect_array");
122}
123
124/* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
128
129static tree
355fe088 130read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
131 tree array, unsigned HOST_WIDE_INT n)
132{
133 tree vect_type, vect, vect_name, array_ref;
355fe088 134 gimple *new_stmt;
272c6793
RS
135
136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137 vect_type = TREE_TYPE (TREE_TYPE (array));
138 vect = vect_create_destination_var (scalar_dest, vect_type);
139 array_ref = build4 (ARRAY_REF, vect_type, array,
140 build_int_cst (size_type_node, n),
141 NULL_TREE, NULL_TREE);
142
143 new_stmt = gimple_build_assign (vect, array_ref);
144 vect_name = make_ssa_name (vect, new_stmt);
145 gimple_assign_set_lhs (new_stmt, vect_name);
146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
147
148 return vect_name;
149}
150
151/* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
154
155static void
355fe088 156write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
157 tree array, unsigned HOST_WIDE_INT n)
158{
159 tree array_ref;
355fe088 160 gimple *new_stmt;
272c6793
RS
161
162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
165
166 new_stmt = gimple_build_assign (array_ref, vect);
167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
168}
169
170/* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
172 (and its group). */
173
174static tree
44fc7854 175create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 176{
44fc7854 177 tree mem_ref;
272c6793 178
272c6793
RS
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
644ffefd 181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
182 return mem_ref;
183}
184
3ba4ff41
RS
185/* Add a clobber of variable VAR to the vectorization of STMT.
186 Emit the clobber before *GSI. */
187
188static void
189vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
190{
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (stmt, new_stmt, gsi);
194}
195
ebfd146a
IR
196/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
197
198/* Function vect_mark_relevant.
199
200 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
201
202static void
355fe088 203vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 204 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
205{
206 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
207 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
208 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 209 gimple *pattern_stmt;
ebfd146a 210
73fbfcad 211 if (dump_enabled_p ())
66c16fd9
RB
212 {
213 dump_printf_loc (MSG_NOTE, vect_location,
214 "mark relevant %d, live %d: ", relevant, live_p);
215 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
216 }
ebfd146a 217
83197f37
IR
218 /* If this stmt is an original stmt in a pattern, we might need to mark its
219 related pattern stmt instead of the original stmt. However, such stmts
220 may have their own uses that are not in any pattern, in such cases the
221 stmt itself should be marked. */
ebfd146a
IR
222 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
223 {
97ecdb46
JJ
224 /* This is the last stmt in a sequence that was detected as a
225 pattern that can potentially be vectorized. Don't mark the stmt
226 as relevant/live because it's not going to be vectorized.
227 Instead mark the pattern-stmt that replaces it. */
83197f37 228
97ecdb46
JJ
229 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
230
231 if (dump_enabled_p ())
232 dump_printf_loc (MSG_NOTE, vect_location,
233 "last stmt in pattern. don't mark"
234 " relevant/live.\n");
235 stmt_info = vinfo_for_stmt (pattern_stmt);
236 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
237 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
238 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
239 stmt = pattern_stmt;
ebfd146a
IR
240 }
241
242 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
243 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
244 STMT_VINFO_RELEVANT (stmt_info) = relevant;
245
246 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
247 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
248 {
73fbfcad 249 if (dump_enabled_p ())
78c60e3d 250 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 251 "already marked relevant/live.\n");
ebfd146a
IR
252 return;
253 }
254
9771b263 255 worklist->safe_push (stmt);
ebfd146a
IR
256}
257
258
b28ead45
AH
259/* Function is_simple_and_all_uses_invariant
260
261 Return true if STMT is simple and all uses of it are invariant. */
262
263bool
264is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
265{
266 tree op;
267 gimple *def_stmt;
268 ssa_op_iter iter;
269
270 if (!is_gimple_assign (stmt))
271 return false;
272
273 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
274 {
275 enum vect_def_type dt = vect_uninitialized_def;
276
277 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
278 {
279 if (dump_enabled_p ())
280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
281 "use not simple.\n");
282 return false;
283 }
284
285 if (dt != vect_external_def && dt != vect_constant_def)
286 return false;
287 }
288 return true;
289}
290
ebfd146a
IR
291/* Function vect_stmt_relevant_p.
292
293 Return true if STMT in loop that is represented by LOOP_VINFO is
294 "relevant for vectorization".
295
296 A stmt is considered "relevant for vectorization" if:
297 - it has uses outside the loop.
298 - it has vdefs (it alters memory).
299 - control stmts in the loop (except for the exit condition).
300
301 CHECKME: what other side effects would the vectorizer allow? */
302
303static bool
355fe088 304vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
305 enum vect_relevant *relevant, bool *live_p)
306{
307 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
308 ssa_op_iter op_iter;
309 imm_use_iterator imm_iter;
310 use_operand_p use_p;
311 def_operand_p def_p;
312
8644a673 313 *relevant = vect_unused_in_scope;
ebfd146a
IR
314 *live_p = false;
315
316 /* cond stmt other than loop exit cond. */
b8698a0f
L
317 if (is_ctrl_stmt (stmt)
318 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
319 != loop_exit_ctrl_vec_info_type)
8644a673 320 *relevant = vect_used_in_scope;
ebfd146a
IR
321
322 /* changing memory. */
323 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
324 if (gimple_vdef (stmt)
325 && !gimple_clobber_p (stmt))
ebfd146a 326 {
73fbfcad 327 if (dump_enabled_p ())
78c60e3d 328 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 329 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 330 *relevant = vect_used_in_scope;
ebfd146a
IR
331 }
332
333 /* uses outside the loop. */
334 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
335 {
336 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
337 {
338 basic_block bb = gimple_bb (USE_STMT (use_p));
339 if (!flow_bb_inside_loop_p (loop, bb))
340 {
73fbfcad 341 if (dump_enabled_p ())
78c60e3d 342 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 343 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 344
3157b0c2
AO
345 if (is_gimple_debug (USE_STMT (use_p)))
346 continue;
347
ebfd146a
IR
348 /* We expect all such uses to be in the loop exit phis
349 (because of loop closed form) */
350 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
351 gcc_assert (bb == single_exit (loop)->dest);
352
353 *live_p = true;
354 }
355 }
356 }
357
3a2edf4c
AH
358 if (*live_p && *relevant == vect_unused_in_scope
359 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
360 {
361 if (dump_enabled_p ())
362 dump_printf_loc (MSG_NOTE, vect_location,
363 "vec_stmt_relevant_p: stmt live but not relevant.\n");
364 *relevant = vect_used_only_live;
365 }
366
ebfd146a
IR
367 return (*live_p || *relevant);
368}
369
370
b8698a0f 371/* Function exist_non_indexing_operands_for_use_p
ebfd146a 372
ff802fa1 373 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
374 used in STMT for anything other than indexing an array. */
375
376static bool
355fe088 377exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
378{
379 tree operand;
380 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 381
ff802fa1 382 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
383 reference in STMT, then any operand that corresponds to USE
384 is not indexing an array. */
385 if (!STMT_VINFO_DATA_REF (stmt_info))
386 return true;
59a05b0c 387
ebfd146a
IR
388 /* STMT has a data_ref. FORNOW this means that its of one of
389 the following forms:
390 -1- ARRAY_REF = var
391 -2- var = ARRAY_REF
392 (This should have been verified in analyze_data_refs).
393
394 'var' in the second case corresponds to a def, not a use,
b8698a0f 395 so USE cannot correspond to any operands that are not used
ebfd146a
IR
396 for array indexing.
397
398 Therefore, all we need to check is if STMT falls into the
399 first case, and whether var corresponds to USE. */
ebfd146a
IR
400
401 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
402 {
403 if (is_gimple_call (stmt)
404 && gimple_call_internal_p (stmt))
bfaa08b7
RS
405 {
406 internal_fn ifn = gimple_call_internal_fn (stmt);
407 int mask_index = internal_fn_mask_index (ifn);
408 if (mask_index >= 0
409 && use == gimple_call_arg (stmt, mask_index))
410 return true;
f307441a
RS
411 int stored_value_index = internal_fn_stored_value_index (ifn);
412 if (stored_value_index >= 0
413 && use == gimple_call_arg (stmt, stored_value_index))
414 return true;
bfaa08b7
RS
415 if (internal_gather_scatter_fn_p (ifn)
416 && use == gimple_call_arg (stmt, 1))
417 return true;
bfaa08b7 418 }
5ce9450f
JJ
419 return false;
420 }
421
59a05b0c
EB
422 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
423 return false;
ebfd146a 424 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
425 if (TREE_CODE (operand) != SSA_NAME)
426 return false;
427
428 if (operand == use)
429 return true;
430
431 return false;
432}
433
434
b8698a0f 435/*
ebfd146a
IR
436 Function process_use.
437
438 Inputs:
439 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 440 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 441 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 442 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
443 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
444 be performed.
ebfd146a
IR
445
446 Outputs:
447 Generally, LIVE_P and RELEVANT are used to define the liveness and
448 relevance info of the DEF_STMT of this USE:
449 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
450 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
451 Exceptions:
452 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 453 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 454 of the respective DEF_STMT is left unchanged.
b8698a0f
L
455 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
456 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
457 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
458 be modified accordingly.
459
460 Return true if everything is as expected. Return false otherwise. */
461
462static bool
b28ead45 463process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 464 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 465 bool force)
ebfd146a
IR
466{
467 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
468 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
469 stmt_vec_info dstmt_vinfo;
470 basic_block bb, def_bb;
355fe088 471 gimple *def_stmt;
ebfd146a
IR
472 enum vect_def_type dt;
473
b8698a0f 474 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 475 that are used for address computation are not considered relevant. */
aec7ae7d 476 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
477 return true;
478
81c40241 479 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 480 {
73fbfcad 481 if (dump_enabled_p ())
78c60e3d 482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 483 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
484 return false;
485 }
486
487 if (!def_stmt || gimple_nop_p (def_stmt))
488 return true;
489
490 def_bb = gimple_bb (def_stmt);
491 if (!flow_bb_inside_loop_p (loop, def_bb))
492 {
73fbfcad 493 if (dump_enabled_p ())
e645e942 494 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
495 return true;
496 }
497
b8698a0f
L
498 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
499 DEF_STMT must have already been processed, because this should be the
500 only way that STMT, which is a reduction-phi, was put in the worklist,
501 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
502 check that everything is as expected, and we are done. */
503 dstmt_vinfo = vinfo_for_stmt (def_stmt);
504 bb = gimple_bb (stmt);
505 if (gimple_code (stmt) == GIMPLE_PHI
506 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
507 && gimple_code (def_stmt) != GIMPLE_PHI
508 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
509 && bb->loop_father == def_bb->loop_father)
510 {
73fbfcad 511 if (dump_enabled_p ())
78c60e3d 512 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 513 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
514 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
515 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
516 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 517 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 518 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
519 return true;
520 }
521
522 /* case 3a: outer-loop stmt defining an inner-loop stmt:
523 outer-loop-header-bb:
524 d = def_stmt
525 inner-loop:
526 stmt # use (d)
527 outer-loop-tail-bb:
528 ... */
529 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
530 {
73fbfcad 531 if (dump_enabled_p ())
78c60e3d 532 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 533 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 534
ebfd146a
IR
535 switch (relevant)
536 {
8644a673 537 case vect_unused_in_scope:
7c5222ff
IR
538 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
539 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 540 break;
7c5222ff 541
ebfd146a 542 case vect_used_in_outer_by_reduction:
7c5222ff 543 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
544 relevant = vect_used_by_reduction;
545 break;
7c5222ff 546
ebfd146a 547 case vect_used_in_outer:
7c5222ff 548 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 549 relevant = vect_used_in_scope;
ebfd146a 550 break;
7c5222ff 551
8644a673 552 case vect_used_in_scope:
ebfd146a
IR
553 break;
554
555 default:
556 gcc_unreachable ();
b8698a0f 557 }
ebfd146a
IR
558 }
559
560 /* case 3b: inner-loop stmt defining an outer-loop stmt:
561 outer-loop-header-bb:
562 ...
563 inner-loop:
564 d = def_stmt
06066f92 565 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
566 stmt # use (d) */
567 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
568 {
73fbfcad 569 if (dump_enabled_p ())
78c60e3d 570 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 571 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 572
ebfd146a
IR
573 switch (relevant)
574 {
8644a673 575 case vect_unused_in_scope:
b8698a0f 576 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 577 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 578 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
579 break;
580
ebfd146a 581 case vect_used_by_reduction:
b28ead45 582 case vect_used_only_live:
ebfd146a
IR
583 relevant = vect_used_in_outer_by_reduction;
584 break;
585
8644a673 586 case vect_used_in_scope:
ebfd146a
IR
587 relevant = vect_used_in_outer;
588 break;
589
590 default:
591 gcc_unreachable ();
592 }
593 }
643a9684
RB
594 /* We are also not interested in uses on loop PHI backedges that are
595 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
596 and cause hybrid SLP for SLP inductions. Unless the PHI is live
597 of course. */
643a9684
RB
598 else if (gimple_code (stmt) == GIMPLE_PHI
599 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 600 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
601 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
602 == use))
603 {
604 if (dump_enabled_p ())
605 dump_printf_loc (MSG_NOTE, vect_location,
606 "induction value on backedge.\n");
607 return true;
608 }
609
ebfd146a 610
b28ead45 611 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
612 return true;
613}
614
615
616/* Function vect_mark_stmts_to_be_vectorized.
617
618 Not all stmts in the loop need to be vectorized. For example:
619
620 for i...
621 for j...
622 1. T0 = i + j
623 2. T1 = a[T0]
624
625 3. j = j + 1
626
627 Stmt 1 and 3 do not need to be vectorized, because loop control and
628 addressing of vectorized data-refs are handled differently.
629
630 This pass detects such stmts. */
631
632bool
633vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
634{
ebfd146a
IR
635 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
636 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
637 unsigned int nbbs = loop->num_nodes;
638 gimple_stmt_iterator si;
355fe088 639 gimple *stmt;
ebfd146a
IR
640 unsigned int i;
641 stmt_vec_info stmt_vinfo;
642 basic_block bb;
355fe088 643 gimple *phi;
ebfd146a 644 bool live_p;
b28ead45 645 enum vect_relevant relevant;
ebfd146a 646
73fbfcad 647 if (dump_enabled_p ())
78c60e3d 648 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 649 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 650
355fe088 651 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
652
653 /* 1. Init worklist. */
654 for (i = 0; i < nbbs; i++)
655 {
656 bb = bbs[i];
657 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 658 {
ebfd146a 659 phi = gsi_stmt (si);
73fbfcad 660 if (dump_enabled_p ())
ebfd146a 661 {
78c60e3d
SS
662 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
664 }
665
666 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 667 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
668 }
669 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
670 {
671 stmt = gsi_stmt (si);
73fbfcad 672 if (dump_enabled_p ())
ebfd146a 673 {
78c60e3d
SS
674 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
675 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 676 }
ebfd146a
IR
677
678 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 679 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
680 }
681 }
682
683 /* 2. Process_worklist */
9771b263 684 while (worklist.length () > 0)
ebfd146a
IR
685 {
686 use_operand_p use_p;
687 ssa_op_iter iter;
688
9771b263 689 stmt = worklist.pop ();
73fbfcad 690 if (dump_enabled_p ())
ebfd146a 691 {
78c60e3d
SS
692 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
693 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
694 }
695
b8698a0f 696 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
697 (DEF_STMT) as relevant/irrelevant according to the relevance property
698 of STMT. */
ebfd146a
IR
699 stmt_vinfo = vinfo_for_stmt (stmt);
700 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 701
b28ead45
AH
702 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
703 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
704
705 One exception is when STMT has been identified as defining a reduction
b28ead45 706 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 707 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 708 those that are used by a reduction computation, and those that are
ff802fa1 709 (also) used by a regular computation. This allows us later on to
b8698a0f 710 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 711 order of the results that they produce does not have to be kept. */
ebfd146a 712
b28ead45 713 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 714 {
06066f92 715 case vect_reduction_def:
b28ead45
AH
716 gcc_assert (relevant != vect_unused_in_scope);
717 if (relevant != vect_unused_in_scope
718 && relevant != vect_used_in_scope
719 && relevant != vect_used_by_reduction
720 && relevant != vect_used_only_live)
06066f92 721 {
b28ead45
AH
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "unsupported use of reduction.\n");
725 return false;
06066f92 726 }
06066f92 727 break;
b8698a0f 728
06066f92 729 case vect_nested_cycle:
b28ead45
AH
730 if (relevant != vect_unused_in_scope
731 && relevant != vect_used_in_outer_by_reduction
732 && relevant != vect_used_in_outer)
06066f92 733 {
73fbfcad 734 if (dump_enabled_p ())
78c60e3d 735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 736 "unsupported use of nested cycle.\n");
7c5222ff 737
06066f92
IR
738 return false;
739 }
b8698a0f
L
740 break;
741
06066f92 742 case vect_double_reduction_def:
b28ead45
AH
743 if (relevant != vect_unused_in_scope
744 && relevant != vect_used_by_reduction
745 && relevant != vect_used_only_live)
06066f92 746 {
73fbfcad 747 if (dump_enabled_p ())
78c60e3d 748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 749 "unsupported use of double reduction.\n");
7c5222ff 750
7c5222ff 751 return false;
06066f92 752 }
b8698a0f 753 break;
7c5222ff 754
06066f92
IR
755 default:
756 break;
7c5222ff 757 }
b8698a0f 758
aec7ae7d 759 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
760 {
761 /* Pattern statements are not inserted into the code, so
762 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
763 have to scan the RHS or function arguments instead. */
764 if (is_gimple_assign (stmt))
765 {
69d2aade
JJ
766 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
767 tree op = gimple_assign_rhs1 (stmt);
768
769 i = 1;
770 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
771 {
772 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 773 relevant, &worklist, false)
69d2aade 774 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 775 relevant, &worklist, false))
566d377a 776 return false;
69d2aade
JJ
777 i = 2;
778 }
779 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 780 {
69d2aade 781 op = gimple_op (stmt, i);
afbe6325 782 if (TREE_CODE (op) == SSA_NAME
b28ead45 783 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 784 &worklist, false))
07687835 785 return false;
9d5e7640
IR
786 }
787 }
788 else if (is_gimple_call (stmt))
789 {
790 for (i = 0; i < gimple_call_num_args (stmt); i++)
791 {
792 tree arg = gimple_call_arg (stmt, i);
b28ead45 793 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 794 &worklist, false))
07687835 795 return false;
9d5e7640
IR
796 }
797 }
798 }
799 else
800 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
801 {
802 tree op = USE_FROM_PTR (use_p);
b28ead45 803 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 804 &worklist, false))
07687835 805 return false;
9d5e7640 806 }
aec7ae7d 807
3bab6342 808 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 809 {
134c85ca
RS
810 gather_scatter_info gs_info;
811 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
812 gcc_unreachable ();
813 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
814 &worklist, true))
566d377a 815 return false;
aec7ae7d 816 }
ebfd146a
IR
817 } /* while worklist */
818
ebfd146a
IR
819 return true;
820}
821
822
b8698a0f 823/* Function vect_model_simple_cost.
ebfd146a 824
b8698a0f 825 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
826 single op. Right now, this does not account for multiple insns that could
827 be generated for the single vector op. We will handle that shortly. */
828
829void
b8698a0f 830vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 831 enum vect_def_type *dt,
4fc5ebf1 832 int ndts,
92345349
BS
833 stmt_vector_for_cost *prologue_cost_vec,
834 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
835{
836 int i;
92345349 837 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
838
839 /* The SLP costs were already calculated during SLP tree build. */
78604de0 840 gcc_assert (!PURE_SLP_STMT (stmt_info));
ebfd146a 841
4fc5ebf1
JG
842 /* Cost the "broadcast" of a scalar operand in to a vector operand.
843 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
844 cost model. */
845 for (i = 0; i < ndts; i++)
92345349 846 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
4fc5ebf1 847 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
92345349 848 stmt_info, 0, vect_prologue);
c3e7ee41
BS
849
850 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
851 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
852 stmt_info, 0, vect_body);
c3e7ee41 853
73fbfcad 854 if (dump_enabled_p ())
78c60e3d
SS
855 dump_printf_loc (MSG_NOTE, vect_location,
856 "vect_model_simple_cost: inside_cost = %d, "
e645e942 857 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
858}
859
860
8bd37302
BS
861/* Model cost for type demotion and promotion operations. PWR is normally
862 zero for single-step promotions and demotions. It will be one if
863 two-step promotion/demotion is required, and so on. Each additional
864 step doubles the number of instructions required. */
865
866static void
867vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
868 enum vect_def_type *dt, int pwr)
869{
870 int i, tmp;
92345349 871 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
872 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
873 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
874 void *target_cost_data;
8bd37302
BS
875
876 /* The SLP costs were already calculated during SLP tree build. */
78604de0 877 gcc_assert (!PURE_SLP_STMT (stmt_info));
8bd37302 878
c3e7ee41
BS
879 if (loop_vinfo)
880 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
881 else
882 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
883
8bd37302
BS
884 for (i = 0; i < pwr + 1; i++)
885 {
886 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
887 (i + 1) : i;
c3e7ee41 888 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
889 vec_promote_demote, stmt_info, 0,
890 vect_body);
8bd37302
BS
891 }
892
893 /* FORNOW: Assuming maximum 2 args per stmts. */
894 for (i = 0; i < 2; i++)
92345349
BS
895 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
896 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
897 stmt_info, 0, vect_prologue);
8bd37302 898
73fbfcad 899 if (dump_enabled_p ())
78c60e3d
SS
900 dump_printf_loc (MSG_NOTE, vect_location,
901 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 902 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
903}
904
ebfd146a
IR
905/* Function vect_model_store_cost
906
0d0293ac
MM
907 Models cost for stores. In the case of grouped accesses, one access
908 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
909
910void
b8698a0f 911vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee 912 vect_memory_access_type memory_access_type,
9ce4345a 913 vec_load_store_type vls_type, slp_tree slp_node,
92345349
BS
914 stmt_vector_for_cost *prologue_cost_vec,
915 stmt_vector_for_cost *body_cost_vec)
ebfd146a 916{
92345349 917 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
918 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
919 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
920 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 921
9ce4345a 922 if (vls_type == VLS_STORE_INVARIANT)
92345349
BS
923 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
924 stmt_info, 0, vect_prologue);
ebfd146a 925
892a981f
RS
926 /* Grouped stores update all elements in the group at once,
927 so we want the DR for the first statement. */
928 if (!slp_node && grouped_access_p)
720f5239 929 {
892a981f
RS
930 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
931 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 932 }
ebfd146a 933
892a981f
RS
934 /* True if we should include any once-per-group costs as well as
935 the cost of the statement itself. For SLP we only get called
936 once per group anyhow. */
937 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
938
272c6793 939 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 940 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 941 access is instead being provided by a permute-and-store operation,
2de001ee
RS
942 include the cost of the permutes. */
943 if (first_stmt_p
944 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 945 {
e1377713
ES
946 /* Uses a high and low interleave or shuffle operations for each
947 needed permute. */
892a981f 948 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 949 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
950 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
951 stmt_info, 0, vect_body);
ebfd146a 952
73fbfcad 953 if (dump_enabled_p ())
78c60e3d 954 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 955 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 956 group_size);
ebfd146a
IR
957 }
958
cee62fee 959 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 960 /* Costs of the stores. */
067bc855
RB
961 if (memory_access_type == VMAT_ELEMENTWISE
962 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
963 {
964 /* N scalar stores plus extracting the elements. */
965 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
966 inside_cost += record_stmt_cost (body_cost_vec,
967 ncopies * assumed_nunits,
968 scalar_store, stmt_info, 0, vect_body);
969 }
f2e2a985 970 else
892a981f 971 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 972
2de001ee
RS
973 if (memory_access_type == VMAT_ELEMENTWISE
974 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
975 {
976 /* N scalar stores plus extracting the elements. */
977 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
978 inside_cost += record_stmt_cost (body_cost_vec,
979 ncopies * assumed_nunits,
980 vec_to_scalar, stmt_info, 0, vect_body);
981 }
cee62fee 982
73fbfcad 983 if (dump_enabled_p ())
78c60e3d
SS
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: inside_cost = %d, "
e645e942 986 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
987}
988
989
720f5239
IR
990/* Calculate cost of DR's memory access. */
991void
992vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 993 unsigned int *inside_cost,
92345349 994 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
995{
996 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 997 gimple *stmt = DR_STMT (dr);
c3e7ee41 998 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
999
1000 switch (alignment_support_scheme)
1001 {
1002 case dr_aligned:
1003 {
92345349
BS
1004 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1005 vector_store, stmt_info, 0,
1006 vect_body);
720f5239 1007
73fbfcad 1008 if (dump_enabled_p ())
78c60e3d 1009 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1010 "vect_model_store_cost: aligned.\n");
720f5239
IR
1011 break;
1012 }
1013
1014 case dr_unaligned_supported:
1015 {
720f5239 1016 /* Here, we assign an additional cost for the unaligned store. */
92345349 1017 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1018 unaligned_store, stmt_info,
92345349 1019 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1020 if (dump_enabled_p ())
78c60e3d
SS
1021 dump_printf_loc (MSG_NOTE, vect_location,
1022 "vect_model_store_cost: unaligned supported by "
e645e942 1023 "hardware.\n");
720f5239
IR
1024 break;
1025 }
1026
38eec4c6
UW
1027 case dr_unaligned_unsupported:
1028 {
1029 *inside_cost = VECT_MAX_COST;
1030
73fbfcad 1031 if (dump_enabled_p ())
78c60e3d 1032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1033 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1034 break;
1035 }
1036
720f5239
IR
1037 default:
1038 gcc_unreachable ();
1039 }
1040}
1041
1042
ebfd146a
IR
1043/* Function vect_model_load_cost
1044
892a981f
RS
1045 Models cost for loads. In the case of grouped accesses, one access has
1046 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1047 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1048 access scheme chosen. */
1049
1050void
92345349 1051vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
1052 vect_memory_access_type memory_access_type,
1053 slp_tree slp_node,
92345349
BS
1054 stmt_vector_for_cost *prologue_cost_vec,
1055 stmt_vector_for_cost *body_cost_vec)
ebfd146a 1056{
892a981f
RS
1057 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1058 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1059 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1060 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1061
892a981f
RS
1062 /* Grouped loads read all elements in the group at once,
1063 so we want the DR for the first statement. */
1064 if (!slp_node && grouped_access_p)
ebfd146a 1065 {
892a981f
RS
1066 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1067 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1068 }
1069
892a981f
RS
1070 /* True if we should include any once-per-group costs as well as
1071 the cost of the statement itself. For SLP we only get called
1072 once per group anyhow. */
1073 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1074
272c6793 1075 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1076 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1077 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1078 include the cost of the permutes. */
1079 if (first_stmt_p
1080 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1081 {
2c23db6d
ES
1082 /* Uses an even and odd extract operations or shuffle operations
1083 for each needed permute. */
892a981f 1084 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d
ES
1085 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1086 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1087 stmt_info, 0, vect_body);
ebfd146a 1088
73fbfcad 1089 if (dump_enabled_p ())
e645e942
TJ
1090 dump_printf_loc (MSG_NOTE, vect_location,
1091 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1092 group_size);
ebfd146a
IR
1093 }
1094
1095 /* The loads themselves. */
067bc855
RB
1096 if (memory_access_type == VMAT_ELEMENTWISE
1097 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1098 {
a21892ad
BS
1099 /* N scalar loads plus gathering them into a vector. */
1100 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1101 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
92345349 1102 inside_cost += record_stmt_cost (body_cost_vec,
c5126ce8 1103 ncopies * assumed_nunits,
92345349 1104 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1105 }
1106 else
892a981f 1107 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349
BS
1108 &inside_cost, &prologue_cost,
1109 prologue_cost_vec, body_cost_vec, true);
2de001ee
RS
1110 if (memory_access_type == VMAT_ELEMENTWISE
1111 || memory_access_type == VMAT_STRIDED_SLP)
892a981f
RS
1112 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1113 stmt_info, 0, vect_body);
720f5239 1114
73fbfcad 1115 if (dump_enabled_p ())
78c60e3d
SS
1116 dump_printf_loc (MSG_NOTE, vect_location,
1117 "vect_model_load_cost: inside_cost = %d, "
e645e942 1118 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1119}
1120
1121
1122/* Calculate cost of DR's memory access. */
1123void
1124vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1125 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1126 unsigned int *prologue_cost,
1127 stmt_vector_for_cost *prologue_cost_vec,
1128 stmt_vector_for_cost *body_cost_vec,
1129 bool record_prologue_costs)
720f5239
IR
1130{
1131 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1132 gimple *stmt = DR_STMT (dr);
c3e7ee41 1133 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1134
1135 switch (alignment_support_scheme)
ebfd146a
IR
1136 {
1137 case dr_aligned:
1138 {
92345349
BS
1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1140 stmt_info, 0, vect_body);
ebfd146a 1141
73fbfcad 1142 if (dump_enabled_p ())
78c60e3d 1143 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1144 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1145
1146 break;
1147 }
1148 case dr_unaligned_supported:
1149 {
720f5239 1150 /* Here, we assign an additional cost for the unaligned load. */
92345349 1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1152 unaligned_load, stmt_info,
92345349 1153 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1154
73fbfcad 1155 if (dump_enabled_p ())
78c60e3d
SS
1156 dump_printf_loc (MSG_NOTE, vect_location,
1157 "vect_model_load_cost: unaligned supported by "
e645e942 1158 "hardware.\n");
ebfd146a
IR
1159
1160 break;
1161 }
1162 case dr_explicit_realign:
1163 {
92345349
BS
1164 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1165 vector_load, stmt_info, 0, vect_body);
1166 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1167 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1168
1169 /* FIXME: If the misalignment remains fixed across the iterations of
1170 the containing loop, the following cost should be added to the
92345349 1171 prologue costs. */
ebfd146a 1172 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1173 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1174 stmt_info, 0, vect_body);
ebfd146a 1175
73fbfcad 1176 if (dump_enabled_p ())
e645e942
TJ
1177 dump_printf_loc (MSG_NOTE, vect_location,
1178 "vect_model_load_cost: explicit realign\n");
8bd37302 1179
ebfd146a
IR
1180 break;
1181 }
1182 case dr_explicit_realign_optimized:
1183 {
73fbfcad 1184 if (dump_enabled_p ())
e645e942 1185 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1186 "vect_model_load_cost: unaligned software "
e645e942 1187 "pipelined.\n");
ebfd146a
IR
1188
1189 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1190 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1191 if this is an access in a group of loads, which provide grouped
ebfd146a 1192 access, then the above cost should only be considered for one
ff802fa1 1193 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1194 and a realignment op. */
1195
92345349 1196 if (add_realign_cost && record_prologue_costs)
ebfd146a 1197 {
92345349
BS
1198 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1199 vector_stmt, stmt_info,
1200 0, vect_prologue);
ebfd146a 1201 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1202 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1203 vector_stmt, stmt_info,
1204 0, vect_prologue);
ebfd146a
IR
1205 }
1206
92345349
BS
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1208 stmt_info, 0, vect_body);
1209 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1210 stmt_info, 0, vect_body);
8bd37302 1211
73fbfcad 1212 if (dump_enabled_p ())
78c60e3d 1213 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1214 "vect_model_load_cost: explicit realign optimized"
1215 "\n");
8bd37302 1216
ebfd146a
IR
1217 break;
1218 }
1219
38eec4c6
UW
1220 case dr_unaligned_unsupported:
1221 {
1222 *inside_cost = VECT_MAX_COST;
1223
73fbfcad 1224 if (dump_enabled_p ())
78c60e3d 1225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1226 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1227 break;
1228 }
1229
ebfd146a
IR
1230 default:
1231 gcc_unreachable ();
1232 }
ebfd146a
IR
1233}
1234
418b7df3
RG
1235/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1236 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1237
418b7df3 1238static void
355fe088 1239vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1240{
ebfd146a 1241 if (gsi)
418b7df3 1242 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1243 else
1244 {
418b7df3 1245 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1247
a70d6342
IR
1248 if (loop_vinfo)
1249 {
1250 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1251 basic_block new_bb;
1252 edge pe;
a70d6342
IR
1253
1254 if (nested_in_vect_loop_p (loop, stmt))
1255 loop = loop->inner;
b8698a0f 1256
a70d6342 1257 pe = loop_preheader_edge (loop);
418b7df3 1258 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1259 gcc_assert (!new_bb);
1260 }
1261 else
1262 {
1263 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1264 basic_block bb;
1265 gimple_stmt_iterator gsi_bb_start;
1266
1267 gcc_assert (bb_vinfo);
1268 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1269 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1270 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1271 }
ebfd146a
IR
1272 }
1273
73fbfcad 1274 if (dump_enabled_p ())
ebfd146a 1275 {
78c60e3d
SS
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "created new init_stmt: ");
1278 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1279 }
418b7df3
RG
1280}
1281
1282/* Function vect_init_vector.
ebfd146a 1283
5467ee52
RG
1284 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1285 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1286 vector type a vector with all elements equal to VAL is created first.
1287 Place the initialization at BSI if it is not NULL. Otherwise, place the
1288 initialization at the loop preheader.
418b7df3
RG
1289 Return the DEF of INIT_STMT.
1290 It will be used in the vectorization of STMT. */
1291
1292tree
355fe088 1293vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1294{
355fe088 1295 gimple *init_stmt;
418b7df3
RG
1296 tree new_temp;
1297
e412ece4
RB
1298 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1299 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1300 {
e412ece4
RB
1301 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1302 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1303 {
5a308cf1
IE
1304 /* Scalar boolean value should be transformed into
1305 all zeros or all ones value before building a vector. */
1306 if (VECTOR_BOOLEAN_TYPE_P (type))
1307 {
b3d51f23
IE
1308 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1309 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1310
1311 if (CONSTANT_CLASS_P (val))
1312 val = integer_zerop (val) ? false_val : true_val;
1313 else
1314 {
1315 new_temp = make_ssa_name (TREE_TYPE (type));
1316 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1317 val, true_val, false_val);
1318 vect_init_vector_1 (stmt, init_stmt, gsi);
1319 val = new_temp;
1320 }
1321 }
1322 else if (CONSTANT_CLASS_P (val))
42fd8198 1323 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1324 else
1325 {
b731b390 1326 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1327 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1328 init_stmt = gimple_build_assign (new_temp,
1329 fold_build1 (VIEW_CONVERT_EXPR,
1330 TREE_TYPE (type),
1331 val));
1332 else
1333 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1334 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1335 val = new_temp;
418b7df3
RG
1336 }
1337 }
5467ee52 1338 val = build_vector_from_val (type, val);
418b7df3
RG
1339 }
1340
0e22bb5a
RB
1341 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1342 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1343 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1344 return new_temp;
ebfd146a
IR
1345}
1346
c83a894c 1347/* Function vect_get_vec_def_for_operand_1.
a70d6342 1348
c83a894c
AH
1349 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1350 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1351
1352tree
c83a894c 1353vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1354{
1355 tree vec_oprnd;
355fe088 1356 gimple *vec_stmt;
ebfd146a 1357 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1358
1359 switch (dt)
1360 {
81c40241 1361 /* operand is a constant or a loop invariant. */
ebfd146a 1362 case vect_constant_def:
81c40241 1363 case vect_external_def:
c83a894c
AH
1364 /* Code should use vect_get_vec_def_for_operand. */
1365 gcc_unreachable ();
ebfd146a 1366
81c40241 1367 /* operand is defined inside the loop. */
8644a673 1368 case vect_internal_def:
ebfd146a 1369 {
ebfd146a
IR
1370 /* Get the def from the vectorized stmt. */
1371 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1372
ebfd146a 1373 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1374 /* Get vectorized pattern statement. */
1375 if (!vec_stmt
1376 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1377 && !STMT_VINFO_RELEVANT (def_stmt_info))
1378 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1379 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1380 gcc_assert (vec_stmt);
1381 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1382 vec_oprnd = PHI_RESULT (vec_stmt);
1383 else if (is_gimple_call (vec_stmt))
1384 vec_oprnd = gimple_call_lhs (vec_stmt);
1385 else
1386 vec_oprnd = gimple_assign_lhs (vec_stmt);
1387 return vec_oprnd;
1388 }
1389
c78e3652 1390 /* operand is defined by a loop header phi. */
ebfd146a 1391 case vect_reduction_def:
06066f92 1392 case vect_double_reduction_def:
7c5222ff 1393 case vect_nested_cycle:
ebfd146a
IR
1394 case vect_induction_def:
1395 {
1396 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1397
1398 /* Get the def from the vectorized stmt. */
1399 def_stmt_info = vinfo_for_stmt (def_stmt);
1400 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1401 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1402 vec_oprnd = PHI_RESULT (vec_stmt);
1403 else
1404 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1405 return vec_oprnd;
1406 }
1407
1408 default:
1409 gcc_unreachable ();
1410 }
1411}
1412
1413
c83a894c
AH
1414/* Function vect_get_vec_def_for_operand.
1415
1416 OP is an operand in STMT. This function returns a (vector) def that will be
1417 used in the vectorized stmt for STMT.
1418
1419 In the case that OP is an SSA_NAME which is defined in the loop, then
1420 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1421
1422 In case OP is an invariant or constant, a new stmt that creates a vector def
1423 needs to be introduced. VECTYPE may be used to specify a required type for
1424 vector invariant. */
1425
1426tree
1427vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1428{
1429 gimple *def_stmt;
1430 enum vect_def_type dt;
1431 bool is_simple_use;
1432 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1433 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1434
1435 if (dump_enabled_p ())
1436 {
1437 dump_printf_loc (MSG_NOTE, vect_location,
1438 "vect_get_vec_def_for_operand: ");
1439 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1440 dump_printf (MSG_NOTE, "\n");
1441 }
1442
1443 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1444 gcc_assert (is_simple_use);
1445 if (def_stmt && dump_enabled_p ())
1446 {
1447 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1448 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1449 }
1450
1451 if (dt == vect_constant_def || dt == vect_external_def)
1452 {
1453 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1454 tree vector_type;
1455
1456 if (vectype)
1457 vector_type = vectype;
2568d8a1 1458 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1459 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1460 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1461 else
1462 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1463
1464 gcc_assert (vector_type);
1465 return vect_init_vector (stmt, op, vector_type, NULL);
1466 }
1467 else
1468 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1469}
1470
1471
ebfd146a
IR
1472/* Function vect_get_vec_def_for_stmt_copy
1473
ff802fa1 1474 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1475 vectorized stmt to be created (by the caller to this function) is a "copy"
1476 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1477 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1478 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1479 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1480 DT is the type of the vector def VEC_OPRND.
1481
1482 Context:
1483 In case the vectorization factor (VF) is bigger than the number
1484 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1485 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1486 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1487 smallest data-type determines the VF, and as a result, when vectorizing
1488 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1489 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1490 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1491 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1492 which VF=16 and nunits=4, so the number of copies required is 4):
1493
1494 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1495
ebfd146a
IR
1496 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1497 VS1.1: vx.1 = memref1 VS1.2
1498 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1499 VS1.3: vx.3 = memref3
ebfd146a
IR
1500
1501 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1502 VSnew.1: vz1 = vx.1 + ... VSnew.2
1503 VSnew.2: vz2 = vx.2 + ... VSnew.3
1504 VSnew.3: vz3 = vx.3 + ...
1505
1506 The vectorization of S1 is explained in vectorizable_load.
1507 The vectorization of S2:
b8698a0f
L
1508 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1509 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1510 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1511 returns the vector-def 'vx.0'.
1512
b8698a0f
L
1513 To create the remaining copies of the vector-stmt (VSnew.j), this
1514 function is called to get the relevant vector-def for each operand. It is
1515 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1516 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1517
b8698a0f
L
1518 For example, to obtain the vector-def 'vx.1' in order to create the
1519 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1520 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1521 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1522 and return its def ('vx.1').
1523 Overall, to create the above sequence this function will be called 3 times:
1524 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1525 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1526 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1527
1528tree
1529vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1530{
355fe088 1531 gimple *vec_stmt_for_operand;
ebfd146a
IR
1532 stmt_vec_info def_stmt_info;
1533
1534 /* Do nothing; can reuse same def. */
8644a673 1535 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1536 return vec_oprnd;
1537
1538 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1539 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1540 gcc_assert (def_stmt_info);
1541 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1542 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1543 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1544 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1545 else
1546 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1547 return vec_oprnd;
1548}
1549
1550
1551/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1552 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1553
c78e3652 1554void
b8698a0f 1555vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1556 vec<tree> *vec_oprnds0,
1557 vec<tree> *vec_oprnds1)
ebfd146a 1558{
9771b263 1559 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1560
1561 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1562 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1563
9771b263 1564 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1565 {
9771b263 1566 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1567 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1568 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1569 }
1570}
1571
1572
c78e3652 1573/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1574
c78e3652 1575void
355fe088 1576vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1577 vec<tree> *vec_oprnds0,
1578 vec<tree> *vec_oprnds1,
306b0c92 1579 slp_tree slp_node)
ebfd146a
IR
1580{
1581 if (slp_node)
d092494c
IR
1582 {
1583 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1584 auto_vec<tree> ops (nops);
1585 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1586
9771b263 1587 ops.quick_push (op0);
d092494c 1588 if (op1)
9771b263 1589 ops.quick_push (op1);
d092494c 1590
306b0c92 1591 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1592
37b5ec8f 1593 *vec_oprnds0 = vec_defs[0];
d092494c 1594 if (op1)
37b5ec8f 1595 *vec_oprnds1 = vec_defs[1];
d092494c 1596 }
ebfd146a
IR
1597 else
1598 {
1599 tree vec_oprnd;
1600
9771b263 1601 vec_oprnds0->create (1);
81c40241 1602 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1603 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1604
1605 if (op1)
1606 {
9771b263 1607 vec_oprnds1->create (1);
81c40241 1608 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1609 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1610 }
1611 }
1612}
1613
bb6c2b68
RS
1614/* Helper function called by vect_finish_replace_stmt and
1615 vect_finish_stmt_generation. Set the location of the new
1616 statement and create a stmt_vec_info for it. */
1617
1618static void
1619vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1620{
1621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1622 vec_info *vinfo = stmt_info->vinfo;
1623
1624 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1625
1626 if (dump_enabled_p ())
1627 {
1628 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1630 }
1631
1632 gimple_set_location (vec_stmt, gimple_location (stmt));
1633
1634 /* While EH edges will generally prevent vectorization, stmt might
1635 e.g. be in a must-not-throw region. Ensure newly created stmts
1636 that could throw are part of the same region. */
1637 int lp_nr = lookup_stmt_eh_lp (stmt);
1638 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1639 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1640}
1641
1642/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1643 which sets the same scalar result as STMT did. */
1644
1645void
1646vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1647{
1648 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1649
1650 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1651 gsi_replace (&gsi, vec_stmt, false);
1652
1653 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1654}
ebfd146a
IR
1655
1656/* Function vect_finish_stmt_generation.
1657
1658 Insert a new stmt. */
1659
1660void
355fe088 1661vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1662 gimple_stmt_iterator *gsi)
1663{
ebfd146a
IR
1664 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1665
54e8e2c3
RG
1666 if (!gsi_end_p (*gsi)
1667 && gimple_has_mem_ops (vec_stmt))
1668 {
355fe088 1669 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1670 tree vuse = gimple_vuse (at_stmt);
1671 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1672 {
1673 tree vdef = gimple_vdef (at_stmt);
1674 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1675 /* If we have an SSA vuse and insert a store, update virtual
1676 SSA form to avoid triggering the renamer. Do so only
1677 if we can easily see all uses - which is what almost always
1678 happens with the way vectorized stmts are inserted. */
1679 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1680 && ((is_gimple_assign (vec_stmt)
1681 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1682 || (is_gimple_call (vec_stmt)
1683 && !(gimple_call_flags (vec_stmt)
1684 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1685 {
1686 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1687 gimple_set_vdef (vec_stmt, new_vdef);
1688 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1689 }
1690 }
1691 }
ebfd146a 1692 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
bb6c2b68 1693 vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1694}
1695
70439f0d
RS
1696/* We want to vectorize a call to combined function CFN with function
1697 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1698 as the types of all inputs. Check whether this is possible using
1699 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1700
70439f0d
RS
1701static internal_fn
1702vectorizable_internal_function (combined_fn cfn, tree fndecl,
1703 tree vectype_out, tree vectype_in)
ebfd146a 1704{
70439f0d
RS
1705 internal_fn ifn;
1706 if (internal_fn_p (cfn))
1707 ifn = as_internal_fn (cfn);
1708 else
1709 ifn = associated_internal_fn (fndecl);
1710 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1711 {
1712 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1713 if (info.vectorizable)
1714 {
1715 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1716 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1717 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1718 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1719 return ifn;
1720 }
1721 }
1722 return IFN_LAST;
ebfd146a
IR
1723}
1724
5ce9450f 1725
355fe088 1726static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1727 gimple_stmt_iterator *);
1728
7cfb4d93
RS
1729/* Check whether a load or store statement in the loop described by
1730 LOOP_VINFO is possible in a fully-masked loop. This is testing
1731 whether the vectorizer pass has the appropriate support, as well as
1732 whether the target does.
1733
1734 VLS_TYPE says whether the statement is a load or store and VECTYPE
1735 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1736 says how the load or store is going to be implemented and GROUP_SIZE
1737 is the number of load or store statements in the containing group.
bfaa08b7
RS
1738 If the access is a gather load or scatter store, GS_INFO describes
1739 its arguments.
7cfb4d93
RS
1740
1741 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1742 supported, otherwise record the required mask types. */
1743
1744static void
1745check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1746 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1747 vect_memory_access_type memory_access_type,
1748 gather_scatter_info *gs_info)
7cfb4d93
RS
1749{
1750 /* Invariant loads need no special support. */
1751 if (memory_access_type == VMAT_INVARIANT)
1752 return;
1753
1754 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1755 machine_mode vecmode = TYPE_MODE (vectype);
1756 bool is_load = (vls_type == VLS_LOAD);
1757 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1758 {
1759 if (is_load
1760 ? !vect_load_lanes_supported (vectype, group_size, true)
1761 : !vect_store_lanes_supported (vectype, group_size, true))
1762 {
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1765 "can't use a fully-masked loop because the"
1766 " target doesn't have an appropriate masked"
1767 " load/store-lanes instruction.\n");
1768 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1769 return;
1770 }
1771 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1772 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1773 return;
1774 }
1775
bfaa08b7
RS
1776 if (memory_access_type == VMAT_GATHER_SCATTER)
1777 {
f307441a
RS
1778 internal_fn ifn = (is_load
1779 ? IFN_MASK_GATHER_LOAD
1780 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1781 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1782 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1783 gs_info->memory_type,
1784 TYPE_SIGN (offset_type),
1785 gs_info->scale))
1786 {
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "can't use a fully-masked loop because the"
1790 " target doesn't have an appropriate masked"
f307441a 1791 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1792 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1793 return;
1794 }
1795 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1796 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1797 return;
1798 }
1799
7cfb4d93
RS
1800 if (memory_access_type != VMAT_CONTIGUOUS
1801 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1802 {
1803 /* Element X of the data must come from iteration i * VF + X of the
1804 scalar loop. We need more work to support other mappings. */
1805 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "can't use a fully-masked loop because an access"
1808 " isn't contiguous.\n");
1809 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1810 return;
1811 }
1812
1813 machine_mode mask_mode;
1814 if (!(targetm.vectorize.get_mask_mode
1815 (GET_MODE_NUNITS (vecmode),
1816 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1817 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1818 {
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1821 "can't use a fully-masked loop because the target"
1822 " doesn't have the appropriate masked load or"
1823 " store.\n");
1824 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1825 return;
1826 }
1827 /* We might load more scalars than we need for permuting SLP loads.
1828 We checked in get_group_load_store_type that the extra elements
1829 don't leak into a new vector. */
1830 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1831 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1832 unsigned int nvectors;
1833 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1834 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1835 else
1836 gcc_unreachable ();
1837}
1838
1839/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1843
1844 MASK_TYPE is the type of both masks. If new statements are needed,
1845 insert them before GSI. */
1846
1847static tree
1848prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1849 gimple_stmt_iterator *gsi)
1850{
1851 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1852 if (!loop_mask)
1853 return vec_mask;
1854
1855 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1856 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1857 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1858 vec_mask, loop_mask);
1859 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1860 return and_res;
1861}
1862
429ef523
RS
1863/* Determine whether we can use a gather load or scatter store to vectorize
1864 strided load or store STMT by truncating the current offset to a smaller
1865 width. We need to be able to construct an offset vector:
1866
1867 { 0, X, X*2, X*3, ... }
1868
1869 without loss of precision, where X is STMT's DR_STEP.
1870
1871 Return true if this is possible, describing the gather load or scatter
1872 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1873
1874static bool
1875vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1876 bool masked_p,
1877 gather_scatter_info *gs_info)
1878{
1879 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1880 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1881 tree step = DR_STEP (dr);
1882 if (TREE_CODE (step) != INTEGER_CST)
1883 {
1884 /* ??? Perhaps we could use range information here? */
1885 if (dump_enabled_p ())
1886 dump_printf_loc (MSG_NOTE, vect_location,
1887 "cannot truncate variable step.\n");
1888 return false;
1889 }
1890
1891 /* Get the number of bits in an element. */
1892 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1893 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1894 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1895
1896 /* Set COUNT to the upper limit on the number of elements - 1.
1897 Start with the maximum vectorization factor. */
1898 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1899
1900 /* Try lowering COUNT to the number of scalar latch iterations. */
1901 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1902 widest_int max_iters;
1903 if (max_loop_iterations (loop, &max_iters)
1904 && max_iters < count)
1905 count = max_iters.to_shwi ();
1906
1907 /* Try scales of 1 and the element size. */
1908 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
1909 bool overflow_p = false;
1910 for (int i = 0; i < 2; ++i)
1911 {
1912 int scale = scales[i];
1913 widest_int factor;
1914 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1915 continue;
1916
1917 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1918 in OFFSET_BITS bits. */
1919 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
1920 if (overflow_p)
1921 continue;
1922 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1923 if (wi::min_precision (range, sign) > element_bits)
1924 {
1925 overflow_p = true;
1926 continue;
1927 }
1928
1929 /* See whether the target supports the operation. */
1930 tree memory_type = TREE_TYPE (DR_REF (dr));
1931 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
1932 memory_type, element_bits, sign, scale,
1933 &gs_info->ifn, &gs_info->element_type))
1934 continue;
1935
1936 tree offset_type = build_nonstandard_integer_type (element_bits,
1937 sign == UNSIGNED);
1938
1939 gs_info->decl = NULL_TREE;
1940 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1941 but we don't need to store that here. */
1942 gs_info->base = NULL_TREE;
1943 gs_info->offset = fold_convert (offset_type, step);
929b4411 1944 gs_info->offset_dt = vect_constant_def;
429ef523
RS
1945 gs_info->offset_vectype = NULL_TREE;
1946 gs_info->scale = scale;
1947 gs_info->memory_type = memory_type;
1948 return true;
1949 }
1950
1951 if (overflow_p && dump_enabled_p ())
1952 dump_printf_loc (MSG_NOTE, vect_location,
1953 "truncating gather/scatter offset to %d bits"
1954 " might change its value.\n", element_bits);
1955
1956 return false;
1957}
1958
ab2fc782
RS
1959/* Return true if we can use gather/scatter internal functions to
1960 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
1961 MASKED_P is true if load or store is conditional. When returning
1962 true, fill in GS_INFO with the information required to perform the
1963 operation. */
ab2fc782
RS
1964
1965static bool
1966vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 1967 bool masked_p,
ab2fc782
RS
1968 gather_scatter_info *gs_info)
1969{
1970 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
1971 || gs_info->decl)
429ef523
RS
1972 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
1973 masked_p, gs_info);
ab2fc782
RS
1974
1975 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
1976 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1977 tree offset_type = TREE_TYPE (gs_info->offset);
1978 unsigned int offset_bits = TYPE_PRECISION (offset_type);
1979
1980 /* Enforced by vect_check_gather_scatter. */
1981 gcc_assert (element_bits >= offset_bits);
1982
1983 /* If the elements are wider than the offset, convert the offset to the
1984 same width, without changing its sign. */
1985 if (element_bits > offset_bits)
1986 {
1987 bool unsigned_p = TYPE_UNSIGNED (offset_type);
1988 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
1989 gs_info->offset = fold_convert (offset_type, gs_info->offset);
1990 }
1991
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_NOTE, vect_location,
1994 "using gather/scatter for strided/grouped access,"
1995 " scale = %d\n", gs_info->scale);
1996
1997 return true;
1998}
1999
62da9e14
RS
2000/* STMT is a non-strided load or store, meaning that it accesses
2001 elements with a known constant step. Return -1 if that step
2002 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2003
2004static int
2005compare_step_with_zero (gimple *stmt)
2006{
2007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2008 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2009 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2010 size_zero_node);
62da9e14
RS
2011}
2012
2013/* If the target supports a permute mask that reverses the elements in
2014 a vector of type VECTYPE, return that mask, otherwise return null. */
2015
2016static tree
2017perm_mask_for_reverse (tree vectype)
2018{
928686b1 2019 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2020
d980067b
RS
2021 /* The encoding has a single stepped pattern. */
2022 vec_perm_builder sel (nunits, 1, 3);
928686b1 2023 for (int i = 0; i < 3; ++i)
908a1a16 2024 sel.quick_push (nunits - 1 - i);
62da9e14 2025
e3342de4
RS
2026 vec_perm_indices indices (sel, 1, nunits);
2027 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2028 return NULL_TREE;
e3342de4 2029 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2030}
5ce9450f 2031
c3a8f964
RS
2032/* STMT is either a masked or unconditional store. Return the value
2033 being stored. */
2034
f307441a 2035tree
c3a8f964
RS
2036vect_get_store_rhs (gimple *stmt)
2037{
2038 if (gassign *assign = dyn_cast <gassign *> (stmt))
2039 {
2040 gcc_assert (gimple_assign_single_p (assign));
2041 return gimple_assign_rhs1 (assign);
2042 }
2043 if (gcall *call = dyn_cast <gcall *> (stmt))
2044 {
2045 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2046 int index = internal_fn_stored_value_index (ifn);
2047 gcc_assert (index >= 0);
2048 return gimple_call_arg (stmt, index);
c3a8f964
RS
2049 }
2050 gcc_unreachable ();
2051}
2052
2de001ee
RS
2053/* A subroutine of get_load_store_type, with a subset of the same
2054 arguments. Handle the case where STMT is part of a grouped load
2055 or store.
2056
2057 For stores, the statements in the group are all consecutive
2058 and there is no gap at the end. For loads, the statements in the
2059 group might not be consecutive; there can be gaps between statements
2060 as well as at the end. */
2061
2062static bool
2063get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2064 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2065 vect_memory_access_type *memory_access_type,
2066 gather_scatter_info *gs_info)
2de001ee
RS
2067{
2068 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2069 vec_info *vinfo = stmt_info->vinfo;
2070 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2071 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2072 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 2073 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
2074 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2075 bool single_element_p = (stmt == first_stmt
2076 && !GROUP_NEXT_ELEMENT (stmt_info));
2077 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 2078 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2079
2080 /* True if the vectorized statements would access beyond the last
2081 statement in the group. */
2082 bool overrun_p = false;
2083
2084 /* True if we can cope with such overrun by peeling for gaps, so that
2085 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2086 bool can_overrun_p = (!masked_p
2087 && vls_type == VLS_LOAD
2088 && loop_vinfo
2089 && !loop->inner);
2de001ee
RS
2090
2091 /* There can only be a gap at the end of the group if the stride is
2092 known at compile time. */
2093 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2094
2095 /* Stores can't yet have gaps. */
2096 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2097
2098 if (slp)
2099 {
2100 if (STMT_VINFO_STRIDED_P (stmt_info))
2101 {
2102 /* Try to use consecutive accesses of GROUP_SIZE elements,
2103 separated by the stride, until we have a complete vector.
2104 Fall back to scalar accesses if that isn't possible. */
928686b1 2105 if (multiple_p (nunits, group_size))
2de001ee
RS
2106 *memory_access_type = VMAT_STRIDED_SLP;
2107 else
2108 *memory_access_type = VMAT_ELEMENTWISE;
2109 }
2110 else
2111 {
2112 overrun_p = loop_vinfo && gap != 0;
2113 if (overrun_p && vls_type != VLS_LOAD)
2114 {
2115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2116 "Grouped store with gaps requires"
2117 " non-consecutive accesses\n");
2118 return false;
2119 }
f702e7d4
RS
2120 /* An overrun is fine if the trailing elements are smaller
2121 than the alignment boundary B. Every vector access will
2122 be a multiple of B and so we are guaranteed to access a
2123 non-gap element in the same B-sized block. */
f9ef2c76 2124 if (overrun_p
f702e7d4
RS
2125 && gap < (vect_known_alignment_in_bytes (first_dr)
2126 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2127 overrun_p = false;
2de001ee
RS
2128 if (overrun_p && !can_overrun_p)
2129 {
2130 if (dump_enabled_p ())
2131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2132 "Peeling for outer loop is not supported\n");
2133 return false;
2134 }
2135 *memory_access_type = VMAT_CONTIGUOUS;
2136 }
2137 }
2138 else
2139 {
2140 /* We can always handle this case using elementwise accesses,
2141 but see if something more efficient is available. */
2142 *memory_access_type = VMAT_ELEMENTWISE;
2143
2144 /* If there is a gap at the end of the group then these optimizations
2145 would access excess elements in the last iteration. */
2146 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2147 /* An overrun is fine if the trailing elements are smaller than the
2148 alignment boundary B. Every vector access will be a multiple of B
2149 and so we are guaranteed to access a non-gap element in the
2150 same B-sized block. */
f9ef2c76 2151 if (would_overrun_p
7e11fc7f 2152 && !masked_p
f702e7d4
RS
2153 && gap < (vect_known_alignment_in_bytes (first_dr)
2154 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2155 would_overrun_p = false;
f702e7d4 2156
2de001ee 2157 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2158 && (can_overrun_p || !would_overrun_p)
2159 && compare_step_with_zero (stmt) > 0)
2de001ee 2160 {
6737facb
RS
2161 /* First cope with the degenerate case of a single-element
2162 vector. */
2163 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2164 *memory_access_type = VMAT_CONTIGUOUS;
2165
2166 /* Otherwise try using LOAD/STORE_LANES. */
2167 if (*memory_access_type == VMAT_ELEMENTWISE
2168 && (vls_type == VLS_LOAD
7e11fc7f
RS
2169 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2170 : vect_store_lanes_supported (vectype, group_size,
2171 masked_p)))
2de001ee
RS
2172 {
2173 *memory_access_type = VMAT_LOAD_STORE_LANES;
2174 overrun_p = would_overrun_p;
2175 }
2176
2177 /* If that fails, try using permuting loads. */
2178 if (*memory_access_type == VMAT_ELEMENTWISE
2179 && (vls_type == VLS_LOAD
2180 ? vect_grouped_load_supported (vectype, single_element_p,
2181 group_size)
2182 : vect_grouped_store_supported (vectype, group_size)))
2183 {
2184 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2185 overrun_p = would_overrun_p;
2186 }
2187 }
429ef523
RS
2188
2189 /* As a last resort, trying using a gather load or scatter store.
2190
2191 ??? Although the code can handle all group sizes correctly,
2192 it probably isn't a win to use separate strided accesses based
2193 on nearby locations. Or, even if it's a win over scalar code,
2194 it might not be a win over vectorizing at a lower VF, if that
2195 allows us to use contiguous accesses. */
2196 if (*memory_access_type == VMAT_ELEMENTWISE
2197 && single_element_p
2198 && loop_vinfo
2199 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2200 masked_p, gs_info))
2201 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2202 }
2203
2204 if (vls_type != VLS_LOAD && first_stmt == stmt)
2205 {
2206 /* STMT is the leader of the group. Check the operands of all the
2207 stmts of the group. */
2208 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2209 while (next_stmt)
2210 {
7e11fc7f 2211 tree op = vect_get_store_rhs (next_stmt);
2de001ee
RS
2212 gimple *def_stmt;
2213 enum vect_def_type dt;
2214 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2215 {
2216 if (dump_enabled_p ())
2217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2218 "use not simple.\n");
2219 return false;
2220 }
2221 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2222 }
2223 }
2224
2225 if (overrun_p)
2226 {
2227 gcc_assert (can_overrun_p);
2228 if (dump_enabled_p ())
2229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2230 "Data access with gaps requires scalar "
2231 "epilogue loop\n");
2232 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2233 }
2234
2235 return true;
2236}
2237
62da9e14
RS
2238/* A subroutine of get_load_store_type, with a subset of the same
2239 arguments. Handle the case where STMT is a load or store that
2240 accesses consecutive elements with a negative step. */
2241
2242static vect_memory_access_type
2243get_negative_load_store_type (gimple *stmt, tree vectype,
2244 vec_load_store_type vls_type,
2245 unsigned int ncopies)
2246{
2247 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2248 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2249 dr_alignment_support alignment_support_scheme;
2250
2251 if (ncopies > 1)
2252 {
2253 if (dump_enabled_p ())
2254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2255 "multiple types with negative step.\n");
2256 return VMAT_ELEMENTWISE;
2257 }
2258
2259 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2260 if (alignment_support_scheme != dr_aligned
2261 && alignment_support_scheme != dr_unaligned_supported)
2262 {
2263 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2265 "negative step but alignment required.\n");
2266 return VMAT_ELEMENTWISE;
2267 }
2268
2269 if (vls_type == VLS_STORE_INVARIANT)
2270 {
2271 if (dump_enabled_p ())
2272 dump_printf_loc (MSG_NOTE, vect_location,
2273 "negative step with invariant source;"
2274 " no permute needed.\n");
2275 return VMAT_CONTIGUOUS_DOWN;
2276 }
2277
2278 if (!perm_mask_for_reverse (vectype))
2279 {
2280 if (dump_enabled_p ())
2281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2282 "negative step and reversing not supported.\n");
2283 return VMAT_ELEMENTWISE;
2284 }
2285
2286 return VMAT_CONTIGUOUS_REVERSE;
2287}
2288
2de001ee
RS
2289/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2290 if there is a memory access type that the vectorized form can use,
2291 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2292 or scatters, fill in GS_INFO accordingly.
2293
2294 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2295 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2296 VECTYPE is the vector type that the vectorized statements will use.
2297 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2298
2299static bool
7e11fc7f 2300get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2301 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2302 vect_memory_access_type *memory_access_type,
2303 gather_scatter_info *gs_info)
2304{
2305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2306 vec_info *vinfo = stmt_info->vinfo;
2307 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2308 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2309 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2310 {
2311 *memory_access_type = VMAT_GATHER_SCATTER;
2312 gimple *def_stmt;
2313 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2314 gcc_unreachable ();
2315 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2316 &gs_info->offset_dt,
2317 &gs_info->offset_vectype))
2318 {
2319 if (dump_enabled_p ())
2320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2321 "%s index use not simple.\n",
2322 vls_type == VLS_LOAD ? "gather" : "scatter");
2323 return false;
2324 }
2325 }
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2327 {
7e11fc7f 2328 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2329 memory_access_type, gs_info))
2de001ee
RS
2330 return false;
2331 }
2332 else if (STMT_VINFO_STRIDED_P (stmt_info))
2333 {
2334 gcc_assert (!slp);
ab2fc782 2335 if (loop_vinfo
429ef523
RS
2336 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2337 masked_p, gs_info))
ab2fc782
RS
2338 *memory_access_type = VMAT_GATHER_SCATTER;
2339 else
2340 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2341 }
2342 else
62da9e14
RS
2343 {
2344 int cmp = compare_step_with_zero (stmt);
2345 if (cmp < 0)
2346 *memory_access_type = get_negative_load_store_type
2347 (stmt, vectype, vls_type, ncopies);
2348 else if (cmp == 0)
2349 {
2350 gcc_assert (vls_type == VLS_LOAD);
2351 *memory_access_type = VMAT_INVARIANT;
2352 }
2353 else
2354 *memory_access_type = VMAT_CONTIGUOUS;
2355 }
2de001ee 2356
4d694b27
RS
2357 if ((*memory_access_type == VMAT_ELEMENTWISE
2358 || *memory_access_type == VMAT_STRIDED_SLP)
2359 && !nunits.is_constant ())
2360 {
2361 if (dump_enabled_p ())
2362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2363 "Not using elementwise accesses due to variable "
2364 "vectorization factor.\n");
2365 return false;
2366 }
2367
2de001ee
RS
2368 /* FIXME: At the moment the cost model seems to underestimate the
2369 cost of using elementwise accesses. This check preserves the
2370 traditional behavior until that can be fixed. */
2371 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8
RS
2372 && !STMT_VINFO_STRIDED_P (stmt_info)
2373 && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2374 && !GROUP_NEXT_ELEMENT (stmt_info)
2375 && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2de001ee
RS
2376 {
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2379 "not falling back to elementwise accesses\n");
2380 return false;
2381 }
2382 return true;
2383}
2384
aaeefd88 2385/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2386 conditional load or store STMT. When returning true, store the type
2387 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2388 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2389
2390static bool
929b4411
RS
2391vect_check_load_store_mask (gimple *stmt, tree mask,
2392 vect_def_type *mask_dt_out,
2393 tree *mask_vectype_out)
aaeefd88
RS
2394{
2395 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2396 {
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399 "mask argument is not a boolean.\n");
2400 return false;
2401 }
2402
2403 if (TREE_CODE (mask) != SSA_NAME)
2404 {
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "mask argument is not an SSA name.\n");
2408 return false;
2409 }
2410
2411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2412 gimple *def_stmt;
929b4411 2413 enum vect_def_type mask_dt;
aaeefd88 2414 tree mask_vectype;
929b4411 2415 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
aaeefd88
RS
2416 &mask_vectype))
2417 {
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420 "mask use not simple.\n");
2421 return false;
2422 }
2423
2424 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2425 if (!mask_vectype)
2426 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2427
2428 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2429 {
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "could not find an appropriate vector mask type.\n");
2433 return false;
2434 }
2435
2436 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2437 TYPE_VECTOR_SUBPARTS (vectype)))
2438 {
2439 if (dump_enabled_p ())
2440 {
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2442 "vector mask type ");
2443 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2444 dump_printf (MSG_MISSED_OPTIMIZATION,
2445 " does not match vector data type ");
2446 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2447 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2448 }
2449 return false;
2450 }
2451
929b4411 2452 *mask_dt_out = mask_dt;
aaeefd88
RS
2453 *mask_vectype_out = mask_vectype;
2454 return true;
2455}
2456
3133c3b6
RS
2457/* Return true if stored value RHS is suitable for vectorizing store
2458 statement STMT. When returning true, store the type of the
929b4411
RS
2459 definition in *RHS_DT_OUT, the type of the vectorized store value in
2460 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2461
2462static bool
929b4411
RS
2463vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2464 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2465{
2466 /* In the case this is a store from a constant make sure
2467 native_encode_expr can handle it. */
2468 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2469 {
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2472 "cannot encode constant as a byte sequence.\n");
2473 return false;
2474 }
2475
2476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2477 gimple *def_stmt;
929b4411 2478 enum vect_def_type rhs_dt;
3133c3b6 2479 tree rhs_vectype;
929b4411 2480 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
3133c3b6
RS
2481 &rhs_vectype))
2482 {
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2485 "use not simple.\n");
2486 return false;
2487 }
2488
2489 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2490 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2491 {
2492 if (dump_enabled_p ())
2493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2494 "incompatible vector types.\n");
2495 return false;
2496 }
2497
929b4411 2498 *rhs_dt_out = rhs_dt;
3133c3b6 2499 *rhs_vectype_out = rhs_vectype;
929b4411 2500 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2501 *vls_type_out = VLS_STORE_INVARIANT;
2502 else
2503 *vls_type_out = VLS_STORE;
2504 return true;
2505}
2506
bc9587eb
RS
2507/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2508 Note that we support masks with floating-point type, in which case the
2509 floats are interpreted as a bitmask. */
2510
2511static tree
2512vect_build_all_ones_mask (gimple *stmt, tree masktype)
2513{
2514 if (TREE_CODE (masktype) == INTEGER_TYPE)
2515 return build_int_cst (masktype, -1);
2516 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2517 {
2518 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2519 mask = build_vector_from_val (masktype, mask);
2520 return vect_init_vector (stmt, mask, masktype, NULL);
2521 }
2522 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2523 {
2524 REAL_VALUE_TYPE r;
2525 long tmp[6];
2526 for (int j = 0; j < 6; ++j)
2527 tmp[j] = -1;
2528 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2529 tree mask = build_real (TREE_TYPE (masktype), r);
2530 mask = build_vector_from_val (masktype, mask);
2531 return vect_init_vector (stmt, mask, masktype, NULL);
2532 }
2533 gcc_unreachable ();
2534}
2535
2536/* Build an all-zero merge value of type VECTYPE while vectorizing
2537 STMT as a gather load. */
2538
2539static tree
2540vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2541{
2542 tree merge;
2543 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2544 merge = build_int_cst (TREE_TYPE (vectype), 0);
2545 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2546 {
2547 REAL_VALUE_TYPE r;
2548 long tmp[6];
2549 for (int j = 0; j < 6; ++j)
2550 tmp[j] = 0;
2551 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2552 merge = build_real (TREE_TYPE (vectype), r);
2553 }
2554 else
2555 gcc_unreachable ();
2556 merge = build_vector_from_val (vectype, merge);
2557 return vect_init_vector (stmt, merge, vectype, NULL);
2558}
2559
c48d2d35
RS
2560/* Build a gather load call while vectorizing STMT. Insert new instructions
2561 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2562 operation. If the load is conditional, MASK is the unvectorized
929b4411 2563 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2564
2565static void
2566vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2567 gimple **vec_stmt, gather_scatter_info *gs_info,
929b4411 2568 tree mask, vect_def_type mask_dt)
c48d2d35
RS
2569{
2570 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2571 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2572 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2573 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2574 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2575 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2576 edge pe = loop_preheader_edge (loop);
2577 enum { NARROW, NONE, WIDEN } modifier;
2578 poly_uint64 gather_off_nunits
2579 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2580
2581 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2582 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2583 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2584 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2585 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2586 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2587 tree scaletype = TREE_VALUE (arglist);
2588 gcc_checking_assert (types_compatible_p (srctype, rettype)
2589 && (!mask || types_compatible_p (srctype, masktype)));
2590
2591 tree perm_mask = NULL_TREE;
2592 tree mask_perm_mask = NULL_TREE;
2593 if (known_eq (nunits, gather_off_nunits))
2594 modifier = NONE;
2595 else if (known_eq (nunits * 2, gather_off_nunits))
2596 {
2597 modifier = WIDEN;
2598
2599 /* Currently widening gathers and scatters are only supported for
2600 fixed-length vectors. */
2601 int count = gather_off_nunits.to_constant ();
2602 vec_perm_builder sel (count, count, 1);
2603 for (int i = 0; i < count; ++i)
2604 sel.quick_push (i | (count / 2));
2605
2606 vec_perm_indices indices (sel, 1, count);
2607 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2608 indices);
2609 }
2610 else if (known_eq (nunits, gather_off_nunits * 2))
2611 {
2612 modifier = NARROW;
2613
2614 /* Currently narrowing gathers and scatters are only supported for
2615 fixed-length vectors. */
2616 int count = nunits.to_constant ();
2617 vec_perm_builder sel (count, count, 1);
2618 sel.quick_grow (count);
2619 for (int i = 0; i < count; ++i)
2620 sel[i] = i < count / 2 ? i : i + count / 2;
2621 vec_perm_indices indices (sel, 2, count);
2622 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2623
2624 ncopies *= 2;
2625
2626 if (mask)
2627 {
2628 for (int i = 0; i < count; ++i)
2629 sel[i] = i | (count / 2);
2630 indices.new_vector (sel, 2, count);
2631 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2632 }
2633 }
2634 else
2635 gcc_unreachable ();
2636
2637 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2638 vectype);
2639
2640 tree ptr = fold_convert (ptrtype, gs_info->base);
2641 if (!is_gimple_min_invariant (ptr))
2642 {
2643 gimple_seq seq;
2644 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2645 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2646 gcc_assert (!new_bb);
2647 }
2648
2649 tree scale = build_int_cst (scaletype, gs_info->scale);
2650
2651 tree vec_oprnd0 = NULL_TREE;
2652 tree vec_mask = NULL_TREE;
2653 tree src_op = NULL_TREE;
2654 tree mask_op = NULL_TREE;
2655 tree prev_res = NULL_TREE;
2656 stmt_vec_info prev_stmt_info = NULL;
2657
2658 if (!mask)
2659 {
2660 src_op = vect_build_zero_merge_argument (stmt, rettype);
2661 mask_op = vect_build_all_ones_mask (stmt, masktype);
2662 }
2663
2664 for (int j = 0; j < ncopies; ++j)
2665 {
2666 tree op, var;
2667 gimple *new_stmt;
2668 if (modifier == WIDEN && (j & 1))
2669 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2670 perm_mask, stmt, gsi);
2671 else if (j == 0)
2672 op = vec_oprnd0
2673 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2674 else
2675 op = vec_oprnd0
2676 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2677
2678 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2679 {
2680 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2681 TYPE_VECTOR_SUBPARTS (idxtype)));
2682 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2683 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2684 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2685 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2686 op = var;
2687 }
2688
2689 if (mask)
2690 {
2691 if (mask_perm_mask && (j & 1))
2692 mask_op = permute_vec_elements (mask_op, mask_op,
2693 mask_perm_mask, stmt, gsi);
2694 else
2695 {
2696 if (j == 0)
2697 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2698 else
929b4411 2699 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2700
2701 mask_op = vec_mask;
2702 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2703 {
2704 gcc_assert
2705 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2706 TYPE_VECTOR_SUBPARTS (masktype)));
2707 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2708 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2709 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2710 mask_op);
2711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2712 mask_op = var;
2713 }
2714 }
2715 src_op = mask_op;
2716 }
2717
2718 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2719 mask_op, scale);
2720
2721 if (!useless_type_conversion_p (vectype, rettype))
2722 {
2723 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2724 TYPE_VECTOR_SUBPARTS (rettype)));
2725 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2726 gimple_call_set_lhs (new_stmt, op);
2727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2728 var = make_ssa_name (vec_dest);
2729 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2730 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2731 }
2732 else
2733 {
2734 var = make_ssa_name (vec_dest, new_stmt);
2735 gimple_call_set_lhs (new_stmt, var);
2736 }
2737
2738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2739
2740 if (modifier == NARROW)
2741 {
2742 if ((j & 1) == 0)
2743 {
2744 prev_res = var;
2745 continue;
2746 }
2747 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2748 new_stmt = SSA_NAME_DEF_STMT (var);
2749 }
2750
2751 if (prev_stmt_info == NULL)
2752 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2753 else
2754 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2755 prev_stmt_info = vinfo_for_stmt (new_stmt);
2756 }
2757}
2758
bfaa08b7
RS
2759/* Prepare the base and offset in GS_INFO for vectorization.
2760 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2761 to the vectorized offset argument for the first copy of STMT. STMT
2762 is the statement described by GS_INFO and LOOP is the containing loop. */
2763
2764static void
2765vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2766 gather_scatter_info *gs_info,
2767 tree *dataref_ptr, tree *vec_offset)
2768{
2769 gimple_seq stmts = NULL;
2770 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2771 if (stmts != NULL)
2772 {
2773 basic_block new_bb;
2774 edge pe = loop_preheader_edge (loop);
2775 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2776 gcc_assert (!new_bb);
2777 }
2778 tree offset_type = TREE_TYPE (gs_info->offset);
2779 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2780 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2781 offset_vectype);
2782}
2783
ab2fc782
RS
2784/* Prepare to implement a grouped or strided load or store using
2785 the gather load or scatter store operation described by GS_INFO.
2786 STMT is the load or store statement.
2787
2788 Set *DATAREF_BUMP to the amount that should be added to the base
2789 address after each copy of the vectorized statement. Set *VEC_OFFSET
2790 to an invariant offset vector in which element I has the value
2791 I * DR_STEP / SCALE. */
2792
2793static void
2794vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2795 gather_scatter_info *gs_info,
2796 tree *dataref_bump, tree *vec_offset)
2797{
2798 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2799 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2800 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2801 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2802 gimple_seq stmts;
2803
2804 tree bump = size_binop (MULT_EXPR,
2805 fold_convert (sizetype, DR_STEP (dr)),
2806 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2807 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2808 if (stmts)
2809 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2810
2811 /* The offset given in GS_INFO can have pointer type, so use the element
2812 type of the vector instead. */
2813 tree offset_type = TREE_TYPE (gs_info->offset);
2814 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2815 offset_type = TREE_TYPE (offset_vectype);
2816
2817 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2818 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2819 ssize_int (gs_info->scale));
2820 step = fold_convert (offset_type, step);
2821 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2822
2823 /* Create {0, X, X*2, X*3, ...}. */
2824 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2825 build_zero_cst (offset_type), step);
2826 if (stmts)
2827 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2828}
2829
2830/* Return the amount that should be added to a vector pointer to move
2831 to the next or previous copy of AGGR_TYPE. DR is the data reference
2832 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2833 vectorization. */
2834
2835static tree
2836vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2837 vect_memory_access_type memory_access_type)
2838{
2839 if (memory_access_type == VMAT_INVARIANT)
2840 return size_zero_node;
2841
2842 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2843 tree step = vect_dr_behavior (dr)->step;
2844 if (tree_int_cst_sgn (step) == -1)
2845 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2846 return iv_step;
2847}
2848
37b14185
RB
2849/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2850
2851static bool
2852vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2853 gimple **vec_stmt, slp_tree slp_node,
2854 tree vectype_in, enum vect_def_type *dt)
2855{
2856 tree op, vectype;
2857 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2858 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2859 unsigned ncopies;
2860 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2861
2862 op = gimple_call_arg (stmt, 0);
2863 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2864
2865 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2866 return false;
37b14185
RB
2867
2868 /* Multiple types in SLP are handled by creating the appropriate number of
2869 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2870 case of SLP. */
2871 if (slp_node)
2872 ncopies = 1;
2873 else
e8f142e2 2874 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2875
2876 gcc_assert (ncopies >= 1);
2877
2878 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2879 if (! char_vectype)
2880 return false;
2881
928686b1
RS
2882 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2883 return false;
2884
794e3180 2885 unsigned word_bytes = num_bytes / nunits;
908a1a16 2886
d980067b
RS
2887 /* The encoding uses one stepped pattern for each byte in the word. */
2888 vec_perm_builder elts (num_bytes, word_bytes, 3);
2889 for (unsigned i = 0; i < 3; ++i)
37b14185 2890 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2891 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2892
e3342de4
RS
2893 vec_perm_indices indices (elts, 1, num_bytes);
2894 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2895 return false;
2896
2897 if (! vec_stmt)
2898 {
2899 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2900 if (dump_enabled_p ())
2901 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2902 "\n");
78604de0 2903 if (! slp_node)
37b14185
RB
2904 {
2905 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2906 1, vector_stmt, stmt_info, 0, vect_prologue);
2907 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2908 ncopies, vec_perm, stmt_info, 0, vect_body);
2909 }
2910 return true;
2911 }
2912
736d0f28 2913 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
2914
2915 /* Transform. */
2916 vec<tree> vec_oprnds = vNULL;
2917 gimple *new_stmt = NULL;
2918 stmt_vec_info prev_stmt_info = NULL;
2919 for (unsigned j = 0; j < ncopies; j++)
2920 {
2921 /* Handle uses. */
2922 if (j == 0)
306b0c92 2923 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
2924 else
2925 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2926
2927 /* Arguments are ready. create the new vector stmt. */
2928 unsigned i;
2929 tree vop;
2930 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2931 {
2932 tree tem = make_ssa_name (char_vectype);
2933 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2934 char_vectype, vop));
2935 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2936 tree tem2 = make_ssa_name (char_vectype);
2937 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2938 tem, tem, bswap_vconst);
2939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2940 tem = make_ssa_name (vectype);
2941 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2942 vectype, tem2));
2943 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2944 if (slp_node)
2945 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2946 }
2947
2948 if (slp_node)
2949 continue;
2950
2951 if (j == 0)
2952 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2953 else
2954 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2955
2956 prev_stmt_info = vinfo_for_stmt (new_stmt);
2957 }
2958
2959 vec_oprnds.release ();
2960 return true;
2961}
2962
b1b6836e
RS
2963/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2964 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2965 in a single step. On success, store the binary pack code in
2966 *CONVERT_CODE. */
2967
2968static bool
2969simple_integer_narrowing (tree vectype_out, tree vectype_in,
2970 tree_code *convert_code)
2971{
2972 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2973 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2974 return false;
2975
2976 tree_code code;
2977 int multi_step_cvt = 0;
2978 auto_vec <tree, 8> interm_types;
2979 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2980 &code, &multi_step_cvt,
2981 &interm_types)
2982 || multi_step_cvt)
2983 return false;
2984
2985 *convert_code = code;
2986 return true;
2987}
5ce9450f 2988
ebfd146a
IR
2989/* Function vectorizable_call.
2990
538dd0b7 2991 Check if GS performs a function call that can be vectorized.
b8698a0f 2992 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2993 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2994 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2995
2996static bool
355fe088 2997vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2998 slp_tree slp_node)
ebfd146a 2999{
538dd0b7 3000 gcall *stmt;
ebfd146a
IR
3001 tree vec_dest;
3002 tree scalar_dest;
3003 tree op, type;
3004 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3005 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3006 tree vectype_out, vectype_in;
c7bda0f4
RS
3007 poly_uint64 nunits_in;
3008 poly_uint64 nunits_out;
ebfd146a 3009 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3010 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3011 vec_info *vinfo = stmt_info->vinfo;
81c40241 3012 tree fndecl, new_temp, rhs_type;
355fe088 3013 gimple *def_stmt;
0502fb85
UB
3014 enum vect_def_type dt[3]
3015 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 3016 int ndts = 3;
355fe088 3017 gimple *new_stmt = NULL;
ebfd146a 3018 int ncopies, j;
6e1aa848 3019 vec<tree> vargs = vNULL;
ebfd146a
IR
3020 enum { NARROW, NONE, WIDEN } modifier;
3021 size_t i, nargs;
9d5e7640 3022 tree lhs;
ebfd146a 3023
190c2236 3024 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3025 return false;
3026
66c16fd9
RB
3027 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3028 && ! vec_stmt)
ebfd146a
IR
3029 return false;
3030
538dd0b7
DM
3031 /* Is GS a vectorizable call? */
3032 stmt = dyn_cast <gcall *> (gs);
3033 if (!stmt)
ebfd146a
IR
3034 return false;
3035
5ce9450f 3036 if (gimple_call_internal_p (stmt)
bfaa08b7 3037 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3038 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3039 /* Handled by vectorizable_load and vectorizable_store. */
3040 return false;
5ce9450f 3041
0136f8f0
AH
3042 if (gimple_call_lhs (stmt) == NULL_TREE
3043 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3044 return false;
3045
0136f8f0 3046 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3047
b690cc0f
RG
3048 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3049
ebfd146a
IR
3050 /* Process function arguments. */
3051 rhs_type = NULL_TREE;
b690cc0f 3052 vectype_in = NULL_TREE;
ebfd146a
IR
3053 nargs = gimple_call_num_args (stmt);
3054
1b1562a5
MM
3055 /* Bail out if the function has more than three arguments, we do not have
3056 interesting builtin functions to vectorize with more than two arguments
3057 except for fma. No arguments is also not good. */
3058 if (nargs == 0 || nargs > 3)
ebfd146a
IR
3059 return false;
3060
74bf76ed
JJ
3061 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3062 if (gimple_call_internal_p (stmt)
3063 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3064 {
3065 nargs = 0;
3066 rhs_type = unsigned_type_node;
3067 }
3068
ebfd146a
IR
3069 for (i = 0; i < nargs; i++)
3070 {
b690cc0f
RG
3071 tree opvectype;
3072
ebfd146a
IR
3073 op = gimple_call_arg (stmt, i);
3074
3075 /* We can only handle calls with arguments of the same type. */
3076 if (rhs_type
8533c9d8 3077 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3078 {
73fbfcad 3079 if (dump_enabled_p ())
78c60e3d 3080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3081 "argument types differ.\n");
ebfd146a
IR
3082 return false;
3083 }
b690cc0f
RG
3084 if (!rhs_type)
3085 rhs_type = TREE_TYPE (op);
ebfd146a 3086
81c40241 3087 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 3088 {
73fbfcad 3089 if (dump_enabled_p ())
78c60e3d 3090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3091 "use not simple.\n");
ebfd146a
IR
3092 return false;
3093 }
ebfd146a 3094
b690cc0f
RG
3095 if (!vectype_in)
3096 vectype_in = opvectype;
3097 else if (opvectype
3098 && opvectype != vectype_in)
3099 {
73fbfcad 3100 if (dump_enabled_p ())
78c60e3d 3101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3102 "argument vector types differ.\n");
b690cc0f
RG
3103 return false;
3104 }
3105 }
3106 /* If all arguments are external or constant defs use a vector type with
3107 the same size as the output vector type. */
ebfd146a 3108 if (!vectype_in)
b690cc0f 3109 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3110 if (vec_stmt)
3111 gcc_assert (vectype_in);
3112 if (!vectype_in)
3113 {
73fbfcad 3114 if (dump_enabled_p ())
7d8930a0 3115 {
78c60e3d
SS
3116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3117 "no vectype for scalar type ");
3118 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3119 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3120 }
3121
3122 return false;
3123 }
ebfd146a
IR
3124
3125 /* FORNOW */
b690cc0f
RG
3126 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3127 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3128 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3129 modifier = NARROW;
c7bda0f4 3130 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3131 modifier = NONE;
c7bda0f4 3132 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3133 modifier = WIDEN;
3134 else
3135 return false;
3136
70439f0d
RS
3137 /* We only handle functions that do not read or clobber memory. */
3138 if (gimple_vuse (stmt))
3139 {
3140 if (dump_enabled_p ())
3141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3142 "function reads from or writes to memory.\n");
3143 return false;
3144 }
3145
ebfd146a
IR
3146 /* For now, we only vectorize functions if a target specific builtin
3147 is available. TODO -- in some cases, it might be profitable to
3148 insert the calls for pieces of the vector, in order to be able
3149 to vectorize other operations in the loop. */
70439f0d
RS
3150 fndecl = NULL_TREE;
3151 internal_fn ifn = IFN_LAST;
3152 combined_fn cfn = gimple_call_combined_fn (stmt);
3153 tree callee = gimple_call_fndecl (stmt);
3154
3155 /* First try using an internal function. */
b1b6836e
RS
3156 tree_code convert_code = ERROR_MARK;
3157 if (cfn != CFN_LAST
3158 && (modifier == NONE
3159 || (modifier == NARROW
3160 && simple_integer_narrowing (vectype_out, vectype_in,
3161 &convert_code))))
70439f0d
RS
3162 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3163 vectype_in);
3164
3165 /* If that fails, try asking for a target-specific built-in function. */
3166 if (ifn == IFN_LAST)
3167 {
3168 if (cfn != CFN_LAST)
3169 fndecl = targetm.vectorize.builtin_vectorized_function
3170 (cfn, vectype_out, vectype_in);
7672aa9b 3171 else if (callee)
70439f0d
RS
3172 fndecl = targetm.vectorize.builtin_md_vectorized_function
3173 (callee, vectype_out, vectype_in);
3174 }
3175
3176 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3177 {
70439f0d 3178 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3179 && !slp_node
3180 && loop_vinfo
3181 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3182 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3183 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3184 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3185 {
3186 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3187 { 0, 1, 2, ... vf - 1 } vector. */
3188 gcc_assert (nargs == 0);
3189 }
37b14185
RB
3190 else if (modifier == NONE
3191 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3192 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3193 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3194 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3195 vectype_in, dt);
74bf76ed
JJ
3196 else
3197 {
3198 if (dump_enabled_p ())
3199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3200 "function is not vectorizable.\n");
74bf76ed
JJ
3201 return false;
3202 }
ebfd146a
IR
3203 }
3204
fce57248 3205 if (slp_node)
190c2236 3206 ncopies = 1;
b1b6836e 3207 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3208 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3209 else
e8f142e2 3210 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3211
3212 /* Sanity check: make sure that at least one copy of the vectorized stmt
3213 needs to be generated. */
3214 gcc_assert (ncopies >= 1);
3215
3216 if (!vec_stmt) /* transformation not required. */
3217 {
3218 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 3219 if (dump_enabled_p ())
e645e942
TJ
3220 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3221 "\n");
78604de0
RB
3222 if (!slp_node)
3223 {
3224 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
3225 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3226 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
3227 vec_promote_demote, stmt_info, 0, vect_body);
3228 }
b1b6836e 3229
ebfd146a
IR
3230 return true;
3231 }
3232
67b8dbac 3233 /* Transform. */
ebfd146a 3234
73fbfcad 3235 if (dump_enabled_p ())
e645e942 3236 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3237
3238 /* Handle def. */
3239 scalar_dest = gimple_call_lhs (stmt);
3240 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3241
3242 prev_stmt_info = NULL;
b1b6836e 3243 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3244 {
b1b6836e 3245 tree prev_res = NULL_TREE;
ebfd146a
IR
3246 for (j = 0; j < ncopies; ++j)
3247 {
3248 /* Build argument list for the vectorized call. */
3249 if (j == 0)
9771b263 3250 vargs.create (nargs);
ebfd146a 3251 else
9771b263 3252 vargs.truncate (0);
ebfd146a 3253
190c2236
JJ
3254 if (slp_node)
3255 {
ef062b13 3256 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3257 vec<tree> vec_oprnds0;
190c2236
JJ
3258
3259 for (i = 0; i < nargs; i++)
9771b263 3260 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3261 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3262 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3263
3264 /* Arguments are ready. Create the new vector stmt. */
9771b263 3265 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3266 {
3267 size_t k;
3268 for (k = 0; k < nargs; k++)
3269 {
37b5ec8f 3270 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3271 vargs[k] = vec_oprndsk[i];
190c2236 3272 }
b1b6836e
RS
3273 if (modifier == NARROW)
3274 {
3275 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3276 gcall *call
3277 = gimple_build_call_internal_vec (ifn, vargs);
3278 gimple_call_set_lhs (call, half_res);
3279 gimple_call_set_nothrow (call, true);
3280 new_stmt = call;
b1b6836e
RS
3281 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3282 if ((i & 1) == 0)
3283 {
3284 prev_res = half_res;
3285 continue;
3286 }
3287 new_temp = make_ssa_name (vec_dest);
3288 new_stmt = gimple_build_assign (new_temp, convert_code,
3289 prev_res, half_res);
3290 }
70439f0d 3291 else
b1b6836e 3292 {
a844293d 3293 gcall *call;
b1b6836e 3294 if (ifn != IFN_LAST)
a844293d 3295 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3296 else
a844293d
RS
3297 call = gimple_build_call_vec (fndecl, vargs);
3298 new_temp = make_ssa_name (vec_dest, call);
3299 gimple_call_set_lhs (call, new_temp);
3300 gimple_call_set_nothrow (call, true);
3301 new_stmt = call;
b1b6836e 3302 }
190c2236 3303 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3304 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3305 }
3306
3307 for (i = 0; i < nargs; i++)
3308 {
37b5ec8f 3309 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3310 vec_oprndsi.release ();
190c2236 3311 }
190c2236
JJ
3312 continue;
3313 }
3314
ebfd146a
IR
3315 for (i = 0; i < nargs; i++)
3316 {
3317 op = gimple_call_arg (stmt, i);
3318 if (j == 0)
3319 vec_oprnd0
81c40241 3320 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3321 else
63827fb8
IR
3322 {
3323 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3324 vec_oprnd0
3325 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3326 }
ebfd146a 3327
9771b263 3328 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
3329 }
3330
74bf76ed
JJ
3331 if (gimple_call_internal_p (stmt)
3332 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3333 {
c7bda0f4 3334 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3335 tree new_var
0e22bb5a 3336 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3337 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3338 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3339 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3340 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3341 }
b1b6836e
RS
3342 else if (modifier == NARROW)
3343 {
3344 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3345 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3346 gimple_call_set_lhs (call, half_res);
3347 gimple_call_set_nothrow (call, true);
3348 new_stmt = call;
b1b6836e
RS
3349 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3350 if ((j & 1) == 0)
3351 {
3352 prev_res = half_res;
3353 continue;
3354 }
3355 new_temp = make_ssa_name (vec_dest);
3356 new_stmt = gimple_build_assign (new_temp, convert_code,
3357 prev_res, half_res);
3358 }
74bf76ed
JJ
3359 else
3360 {
a844293d 3361 gcall *call;
70439f0d 3362 if (ifn != IFN_LAST)
a844293d 3363 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3364 else
a844293d 3365 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3366 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3367 gimple_call_set_lhs (call, new_temp);
3368 gimple_call_set_nothrow (call, true);
3369 new_stmt = call;
74bf76ed 3370 }
ebfd146a
IR
3371 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3372
b1b6836e 3373 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3374 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3375 else
3376 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3377
3378 prev_stmt_info = vinfo_for_stmt (new_stmt);
3379 }
b1b6836e
RS
3380 }
3381 else if (modifier == NARROW)
3382 {
ebfd146a
IR
3383 for (j = 0; j < ncopies; ++j)
3384 {
3385 /* Build argument list for the vectorized call. */
3386 if (j == 0)
9771b263 3387 vargs.create (nargs * 2);
ebfd146a 3388 else
9771b263 3389 vargs.truncate (0);
ebfd146a 3390
190c2236
JJ
3391 if (slp_node)
3392 {
ef062b13 3393 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3394 vec<tree> vec_oprnds0;
190c2236
JJ
3395
3396 for (i = 0; i < nargs; i++)
9771b263 3397 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3398 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3399 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3400
3401 /* Arguments are ready. Create the new vector stmt. */
9771b263 3402 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3403 {
3404 size_t k;
9771b263 3405 vargs.truncate (0);
190c2236
JJ
3406 for (k = 0; k < nargs; k++)
3407 {
37b5ec8f 3408 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3409 vargs.quick_push (vec_oprndsk[i]);
3410 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3411 }
a844293d 3412 gcall *call;
70439f0d 3413 if (ifn != IFN_LAST)
a844293d 3414 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3415 else
a844293d
RS
3416 call = gimple_build_call_vec (fndecl, vargs);
3417 new_temp = make_ssa_name (vec_dest, call);
3418 gimple_call_set_lhs (call, new_temp);
3419 gimple_call_set_nothrow (call, true);
3420 new_stmt = call;
190c2236 3421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3422 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3423 }
3424
3425 for (i = 0; i < nargs; i++)
3426 {
37b5ec8f 3427 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3428 vec_oprndsi.release ();
190c2236 3429 }
190c2236
JJ
3430 continue;
3431 }
3432
ebfd146a
IR
3433 for (i = 0; i < nargs; i++)
3434 {
3435 op = gimple_call_arg (stmt, i);
3436 if (j == 0)
3437 {
3438 vec_oprnd0
81c40241 3439 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3440 vec_oprnd1
63827fb8 3441 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3442 }
3443 else
3444 {
336ecb65 3445 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3446 vec_oprnd0
63827fb8 3447 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3448 vec_oprnd1
63827fb8 3449 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3450 }
3451
9771b263
DN
3452 vargs.quick_push (vec_oprnd0);
3453 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3454 }
3455
b1b6836e 3456 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3457 new_temp = make_ssa_name (vec_dest, new_stmt);
3458 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3460
3461 if (j == 0)
3462 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3463 else
3464 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3465
3466 prev_stmt_info = vinfo_for_stmt (new_stmt);
3467 }
3468
3469 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3470 }
b1b6836e
RS
3471 else
3472 /* No current target implements this case. */
3473 return false;
ebfd146a 3474
9771b263 3475 vargs.release ();
ebfd146a 3476
ebfd146a
IR
3477 /* The call in STMT might prevent it from being removed in dce.
3478 We however cannot remove it here, due to the way the ssa name
3479 it defines is mapped to the new definition. So just replace
3480 rhs of the statement with something harmless. */
3481
dd34c087
JJ
3482 if (slp_node)
3483 return true;
3484
ebfd146a 3485 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3486 if (is_pattern_stmt_p (stmt_info))
3487 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3488 else
3489 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3490
9d5e7640 3491 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3492 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3493 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3494 STMT_VINFO_STMT (stmt_info) = new_stmt;
3495 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3496
3497 return true;
3498}
3499
3500
0136f8f0
AH
3501struct simd_call_arg_info
3502{
3503 tree vectype;
3504 tree op;
0136f8f0 3505 HOST_WIDE_INT linear_step;
34e82342 3506 enum vect_def_type dt;
0136f8f0 3507 unsigned int align;
17b658af 3508 bool simd_lane_linear;
0136f8f0
AH
3509};
3510
17b658af
JJ
3511/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3512 is linear within simd lane (but not within whole loop), note it in
3513 *ARGINFO. */
3514
3515static void
3516vect_simd_lane_linear (tree op, struct loop *loop,
3517 struct simd_call_arg_info *arginfo)
3518{
355fe088 3519 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3520
3521 if (!is_gimple_assign (def_stmt)
3522 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3523 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3524 return;
3525
3526 tree base = gimple_assign_rhs1 (def_stmt);
3527 HOST_WIDE_INT linear_step = 0;
3528 tree v = gimple_assign_rhs2 (def_stmt);
3529 while (TREE_CODE (v) == SSA_NAME)
3530 {
3531 tree t;
3532 def_stmt = SSA_NAME_DEF_STMT (v);
3533 if (is_gimple_assign (def_stmt))
3534 switch (gimple_assign_rhs_code (def_stmt))
3535 {
3536 case PLUS_EXPR:
3537 t = gimple_assign_rhs2 (def_stmt);
3538 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3539 return;
3540 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3541 v = gimple_assign_rhs1 (def_stmt);
3542 continue;
3543 case MULT_EXPR:
3544 t = gimple_assign_rhs2 (def_stmt);
3545 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3546 return;
3547 linear_step = tree_to_shwi (t);
3548 v = gimple_assign_rhs1 (def_stmt);
3549 continue;
3550 CASE_CONVERT:
3551 t = gimple_assign_rhs1 (def_stmt);
3552 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3553 || (TYPE_PRECISION (TREE_TYPE (v))
3554 < TYPE_PRECISION (TREE_TYPE (t))))
3555 return;
3556 if (!linear_step)
3557 linear_step = 1;
3558 v = t;
3559 continue;
3560 default:
3561 return;
3562 }
8e4284d0 3563 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3564 && loop->simduid
3565 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3566 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3567 == loop->simduid))
3568 {
3569 if (!linear_step)
3570 linear_step = 1;
3571 arginfo->linear_step = linear_step;
3572 arginfo->op = base;
3573 arginfo->simd_lane_linear = true;
3574 return;
3575 }
3576 }
3577}
3578
cf1b2ba4
RS
3579/* Return the number of elements in vector type VECTYPE, which is associated
3580 with a SIMD clone. At present these vectors always have a constant
3581 length. */
3582
3583static unsigned HOST_WIDE_INT
3584simd_clone_subparts (tree vectype)
3585{
928686b1 3586 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3587}
3588
0136f8f0
AH
3589/* Function vectorizable_simd_clone_call.
3590
3591 Check if STMT performs a function call that can be vectorized
3592 by calling a simd clone of the function.
3593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3594 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3596
3597static bool
355fe088
TS
3598vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3599 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
3600{
3601 tree vec_dest;
3602 tree scalar_dest;
3603 tree op, type;
3604 tree vec_oprnd0 = NULL_TREE;
3605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3606 tree vectype;
3607 unsigned int nunits;
3608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3609 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3610 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3611 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3612 tree fndecl, new_temp;
355fe088
TS
3613 gimple *def_stmt;
3614 gimple *new_stmt = NULL;
0136f8f0 3615 int ncopies, j;
00426f9a 3616 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3617 vec<tree> vargs = vNULL;
3618 size_t i, nargs;
3619 tree lhs, rtype, ratype;
e7a74006 3620 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3621
3622 /* Is STMT a vectorizable call? */
3623 if (!is_gimple_call (stmt))
3624 return false;
3625
3626 fndecl = gimple_call_fndecl (stmt);
3627 if (fndecl == NULL_TREE)
3628 return false;
3629
d52f5295 3630 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3631 if (node == NULL || node->simd_clones == NULL)
3632 return false;
3633
3634 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3635 return false;
3636
66c16fd9
RB
3637 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3638 && ! vec_stmt)
0136f8f0
AH
3639 return false;
3640
3641 if (gimple_call_lhs (stmt)
3642 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3643 return false;
3644
3645 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3646
3647 vectype = STMT_VINFO_VECTYPE (stmt_info);
3648
3649 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3650 return false;
3651
3652 /* FORNOW */
fce57248 3653 if (slp_node)
0136f8f0
AH
3654 return false;
3655
3656 /* Process function arguments. */
3657 nargs = gimple_call_num_args (stmt);
3658
3659 /* Bail out if the function has zero arguments. */
3660 if (nargs == 0)
3661 return false;
3662
00426f9a 3663 arginfo.reserve (nargs, true);
0136f8f0
AH
3664
3665 for (i = 0; i < nargs; i++)
3666 {
3667 simd_call_arg_info thisarginfo;
3668 affine_iv iv;
3669
3670 thisarginfo.linear_step = 0;
3671 thisarginfo.align = 0;
3672 thisarginfo.op = NULL_TREE;
17b658af 3673 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3674
3675 op = gimple_call_arg (stmt, i);
81c40241
RB
3676 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3677 &thisarginfo.vectype)
0136f8f0
AH
3678 || thisarginfo.dt == vect_uninitialized_def)
3679 {
3680 if (dump_enabled_p ())
3681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3682 "use not simple.\n");
0136f8f0
AH
3683 return false;
3684 }
3685
3686 if (thisarginfo.dt == vect_constant_def
3687 || thisarginfo.dt == vect_external_def)
3688 gcc_assert (thisarginfo.vectype == NULL_TREE);
3689 else
3690 gcc_assert (thisarginfo.vectype != NULL_TREE);
3691
6c9e85fb
JJ
3692 /* For linear arguments, the analyze phase should have saved
3693 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3694 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3695 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3696 {
3697 gcc_assert (vec_stmt);
3698 thisarginfo.linear_step
17b658af 3699 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3700 thisarginfo.op
17b658af
JJ
3701 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3702 thisarginfo.simd_lane_linear
3703 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3704 == boolean_true_node);
6c9e85fb
JJ
3705 /* If loop has been peeled for alignment, we need to adjust it. */
3706 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3707 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3708 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3709 {
3710 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3711 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3712 tree opt = TREE_TYPE (thisarginfo.op);
3713 bias = fold_convert (TREE_TYPE (step), bias);
3714 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3715 thisarginfo.op
3716 = fold_build2 (POINTER_TYPE_P (opt)
3717 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3718 thisarginfo.op, bias);
3719 }
3720 }
3721 else if (!vec_stmt
3722 && thisarginfo.dt != vect_constant_def
3723 && thisarginfo.dt != vect_external_def
3724 && loop_vinfo
3725 && TREE_CODE (op) == SSA_NAME
3726 && simple_iv (loop, loop_containing_stmt (stmt), op,
3727 &iv, false)
3728 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3729 {
3730 thisarginfo.linear_step = tree_to_shwi (iv.step);
3731 thisarginfo.op = iv.base;
3732 }
3733 else if ((thisarginfo.dt == vect_constant_def
3734 || thisarginfo.dt == vect_external_def)
3735 && POINTER_TYPE_P (TREE_TYPE (op)))
3736 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3737 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3738 linear too. */
3739 if (POINTER_TYPE_P (TREE_TYPE (op))
3740 && !thisarginfo.linear_step
3741 && !vec_stmt
3742 && thisarginfo.dt != vect_constant_def
3743 && thisarginfo.dt != vect_external_def
3744 && loop_vinfo
3745 && !slp_node
3746 && TREE_CODE (op) == SSA_NAME)
3747 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3748
3749 arginfo.quick_push (thisarginfo);
3750 }
3751
d9f21f6a
RS
3752 unsigned HOST_WIDE_INT vf;
3753 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3754 {
3755 if (dump_enabled_p ())
3756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3757 "not considering SIMD clones; not yet supported"
3758 " for variable-width vectors.\n");
3759 return NULL;
3760 }
3761
0136f8f0
AH
3762 unsigned int badness = 0;
3763 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3764 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3765 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3766 else
3767 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3768 n = n->simdclone->next_clone)
3769 {
3770 unsigned int this_badness = 0;
d9f21f6a 3771 if (n->simdclone->simdlen > vf
0136f8f0
AH
3772 || n->simdclone->nargs != nargs)
3773 continue;
d9f21f6a
RS
3774 if (n->simdclone->simdlen < vf)
3775 this_badness += (exact_log2 (vf)
0136f8f0
AH
3776 - exact_log2 (n->simdclone->simdlen)) * 1024;
3777 if (n->simdclone->inbranch)
3778 this_badness += 2048;
3779 int target_badness = targetm.simd_clone.usable (n);
3780 if (target_badness < 0)
3781 continue;
3782 this_badness += target_badness * 512;
3783 /* FORNOW: Have to add code to add the mask argument. */
3784 if (n->simdclone->inbranch)
3785 continue;
3786 for (i = 0; i < nargs; i++)
3787 {
3788 switch (n->simdclone->args[i].arg_type)
3789 {
3790 case SIMD_CLONE_ARG_TYPE_VECTOR:
3791 if (!useless_type_conversion_p
3792 (n->simdclone->args[i].orig_type,
3793 TREE_TYPE (gimple_call_arg (stmt, i))))
3794 i = -1;
3795 else if (arginfo[i].dt == vect_constant_def
3796 || arginfo[i].dt == vect_external_def
3797 || arginfo[i].linear_step)
3798 this_badness += 64;
3799 break;
3800 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3801 if (arginfo[i].dt != vect_constant_def
3802 && arginfo[i].dt != vect_external_def)
3803 i = -1;
3804 break;
3805 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3806 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3807 if (arginfo[i].dt == vect_constant_def
3808 || arginfo[i].dt == vect_external_def
3809 || (arginfo[i].linear_step
3810 != n->simdclone->args[i].linear_step))
3811 i = -1;
3812 break;
3813 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3814 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3815 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3816 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3817 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3818 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3819 /* FORNOW */
3820 i = -1;
3821 break;
3822 case SIMD_CLONE_ARG_TYPE_MASK:
3823 gcc_unreachable ();
3824 }
3825 if (i == (size_t) -1)
3826 break;
3827 if (n->simdclone->args[i].alignment > arginfo[i].align)
3828 {
3829 i = -1;
3830 break;
3831 }
3832 if (arginfo[i].align)
3833 this_badness += (exact_log2 (arginfo[i].align)
3834 - exact_log2 (n->simdclone->args[i].alignment));
3835 }
3836 if (i == (size_t) -1)
3837 continue;
3838 if (bestn == NULL || this_badness < badness)
3839 {
3840 bestn = n;
3841 badness = this_badness;
3842 }
3843 }
3844
3845 if (bestn == NULL)
00426f9a 3846 return false;
0136f8f0
AH
3847
3848 for (i = 0; i < nargs; i++)
3849 if ((arginfo[i].dt == vect_constant_def
3850 || arginfo[i].dt == vect_external_def)
3851 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3852 {
3853 arginfo[i].vectype
3854 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3855 i)));
3856 if (arginfo[i].vectype == NULL
cf1b2ba4 3857 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3858 > bestn->simdclone->simdlen))
00426f9a 3859 return false;
0136f8f0
AH
3860 }
3861
3862 fndecl = bestn->decl;
3863 nunits = bestn->simdclone->simdlen;
d9f21f6a 3864 ncopies = vf / nunits;
0136f8f0
AH
3865
3866 /* If the function isn't const, only allow it in simd loops where user
3867 has asserted that at least nunits consecutive iterations can be
3868 performed using SIMD instructions. */
3869 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3870 && gimple_vuse (stmt))
00426f9a 3871 return false;
0136f8f0
AH
3872
3873 /* Sanity check: make sure that at least one copy of the vectorized stmt
3874 needs to be generated. */
3875 gcc_assert (ncopies >= 1);
3876
3877 if (!vec_stmt) /* transformation not required. */
3878 {
6c9e85fb
JJ
3879 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3880 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3881 if ((bestn->simdclone->args[i].arg_type
3882 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3883 || (bestn->simdclone->args[i].arg_type
3884 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3885 {
17b658af 3886 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3887 + 1);
3888 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3889 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3890 ? size_type_node : TREE_TYPE (arginfo[i].op);
3891 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3892 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3893 tree sll = arginfo[i].simd_lane_linear
3894 ? boolean_true_node : boolean_false_node;
3895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3896 }
0136f8f0
AH
3897 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3898 if (dump_enabled_p ())
3899 dump_printf_loc (MSG_NOTE, vect_location,
3900 "=== vectorizable_simd_clone_call ===\n");
3901/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3902 return true;
3903 }
3904
67b8dbac 3905 /* Transform. */
0136f8f0
AH
3906
3907 if (dump_enabled_p ())
3908 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3909
3910 /* Handle def. */
3911 scalar_dest = gimple_call_lhs (stmt);
3912 vec_dest = NULL_TREE;
3913 rtype = NULL_TREE;
3914 ratype = NULL_TREE;
3915 if (scalar_dest)
3916 {
3917 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3918 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3919 if (TREE_CODE (rtype) == ARRAY_TYPE)
3920 {
3921 ratype = rtype;
3922 rtype = TREE_TYPE (ratype);
3923 }
3924 }
3925
3926 prev_stmt_info = NULL;
3927 for (j = 0; j < ncopies; ++j)
3928 {
3929 /* Build argument list for the vectorized call. */
3930 if (j == 0)
3931 vargs.create (nargs);
3932 else
3933 vargs.truncate (0);
3934
3935 for (i = 0; i < nargs; i++)
3936 {
3937 unsigned int k, l, m, o;
3938 tree atype;
3939 op = gimple_call_arg (stmt, i);
3940 switch (bestn->simdclone->args[i].arg_type)
3941 {
3942 case SIMD_CLONE_ARG_TYPE_VECTOR:
3943 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 3944 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
3945 for (m = j * o; m < (j + 1) * o; m++)
3946 {
cf1b2ba4
RS
3947 if (simd_clone_subparts (atype)
3948 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 3949 {
73a699ae 3950 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
3951 k = (simd_clone_subparts (arginfo[i].vectype)
3952 / simd_clone_subparts (atype));
0136f8f0
AH
3953 gcc_assert ((k & (k - 1)) == 0);
3954 if (m == 0)
3955 vec_oprnd0
81c40241 3956 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3957 else
3958 {
3959 vec_oprnd0 = arginfo[i].op;
3960 if ((m & (k - 1)) == 0)
3961 vec_oprnd0
3962 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3963 vec_oprnd0);
3964 }
3965 arginfo[i].op = vec_oprnd0;
3966 vec_oprnd0
3967 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 3968 bitsize_int (prec),
0136f8f0
AH
3969 bitsize_int ((m & (k - 1)) * prec));
3970 new_stmt
b731b390 3971 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3972 vec_oprnd0);
3973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3974 vargs.safe_push (gimple_assign_lhs (new_stmt));
3975 }
3976 else
3977 {
cf1b2ba4
RS
3978 k = (simd_clone_subparts (atype)
3979 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
3980 gcc_assert ((k & (k - 1)) == 0);
3981 vec<constructor_elt, va_gc> *ctor_elts;
3982 if (k != 1)
3983 vec_alloc (ctor_elts, k);
3984 else
3985 ctor_elts = NULL;
3986 for (l = 0; l < k; l++)
3987 {
3988 if (m == 0 && l == 0)
3989 vec_oprnd0
81c40241 3990 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3991 else
3992 vec_oprnd0
3993 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3994 arginfo[i].op);
3995 arginfo[i].op = vec_oprnd0;
3996 if (k == 1)
3997 break;
3998 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3999 vec_oprnd0);
4000 }
4001 if (k == 1)
4002 vargs.safe_push (vec_oprnd0);
4003 else
4004 {
4005 vec_oprnd0 = build_constructor (atype, ctor_elts);
4006 new_stmt
b731b390 4007 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4008 vec_oprnd0);
4009 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4010 vargs.safe_push (gimple_assign_lhs (new_stmt));
4011 }
4012 }
4013 }
4014 break;
4015 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4016 vargs.safe_push (op);
4017 break;
4018 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4019 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4020 if (j == 0)
4021 {
4022 gimple_seq stmts;
4023 arginfo[i].op
4024 = force_gimple_operand (arginfo[i].op, &stmts, true,
4025 NULL_TREE);
4026 if (stmts != NULL)
4027 {
4028 basic_block new_bb;
4029 edge pe = loop_preheader_edge (loop);
4030 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4031 gcc_assert (!new_bb);
4032 }
17b658af
JJ
4033 if (arginfo[i].simd_lane_linear)
4034 {
4035 vargs.safe_push (arginfo[i].op);
4036 break;
4037 }
b731b390 4038 tree phi_res = copy_ssa_name (op);
538dd0b7 4039 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 4040 set_vinfo_for_stmt (new_phi,
310213d4 4041 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
4042 add_phi_arg (new_phi, arginfo[i].op,
4043 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4044 enum tree_code code
4045 = POINTER_TYPE_P (TREE_TYPE (op))
4046 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4047 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4048 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4049 widest_int cst
4050 = wi::mul (bestn->simdclone->args[i].linear_step,
4051 ncopies * nunits);
4052 tree tcst = wide_int_to_tree (type, cst);
b731b390 4053 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
4054 new_stmt
4055 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4056 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4057 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4058 set_vinfo_for_stmt (new_stmt,
310213d4 4059 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
4060 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4061 UNKNOWN_LOCATION);
4062 arginfo[i].op = phi_res;
4063 vargs.safe_push (phi_res);
4064 }
4065 else
4066 {
4067 enum tree_code code
4068 = POINTER_TYPE_P (TREE_TYPE (op))
4069 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4070 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4071 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4072 widest_int cst
4073 = wi::mul (bestn->simdclone->args[i].linear_step,
4074 j * nunits);
4075 tree tcst = wide_int_to_tree (type, cst);
b731b390 4076 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
4077 new_stmt = gimple_build_assign (new_temp, code,
4078 arginfo[i].op, tcst);
0136f8f0
AH
4079 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4080 vargs.safe_push (new_temp);
4081 }
4082 break;
7adb26f2
JJ
4083 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4084 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4085 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4086 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4087 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4088 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4089 default:
4090 gcc_unreachable ();
4091 }
4092 }
4093
4094 new_stmt = gimple_build_call_vec (fndecl, vargs);
4095 if (vec_dest)
4096 {
cf1b2ba4 4097 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4098 if (ratype)
b731b390 4099 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4100 else if (simd_clone_subparts (vectype)
4101 == simd_clone_subparts (rtype))
0136f8f0
AH
4102 new_temp = make_ssa_name (vec_dest, new_stmt);
4103 else
4104 new_temp = make_ssa_name (rtype, new_stmt);
4105 gimple_call_set_lhs (new_stmt, new_temp);
4106 }
4107 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4108
4109 if (vec_dest)
4110 {
cf1b2ba4 4111 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4112 {
4113 unsigned int k, l;
73a699ae
RS
4114 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4115 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4116 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4117 gcc_assert ((k & (k - 1)) == 0);
4118 for (l = 0; l < k; l++)
4119 {
4120 tree t;
4121 if (ratype)
4122 {
4123 t = build_fold_addr_expr (new_temp);
4124 t = build2 (MEM_REF, vectype, t,
73a699ae 4125 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4126 }
4127 else
4128 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4129 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 4130 new_stmt
b731b390 4131 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
4132 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4133 if (j == 0 && l == 0)
4134 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4135 else
4136 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4137
4138 prev_stmt_info = vinfo_for_stmt (new_stmt);
4139 }
4140
4141 if (ratype)
3ba4ff41 4142 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4143 continue;
4144 }
cf1b2ba4 4145 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4146 {
cf1b2ba4
RS
4147 unsigned int k = (simd_clone_subparts (vectype)
4148 / simd_clone_subparts (rtype));
0136f8f0
AH
4149 gcc_assert ((k & (k - 1)) == 0);
4150 if ((j & (k - 1)) == 0)
4151 vec_alloc (ret_ctor_elts, k);
4152 if (ratype)
4153 {
cf1b2ba4 4154 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4155 for (m = 0; m < o; m++)
4156 {
4157 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4158 size_int (m), NULL_TREE, NULL_TREE);
4159 new_stmt
b731b390 4160 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
4161 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4162 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4163 gimple_assign_lhs (new_stmt));
4164 }
3ba4ff41 4165 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4166 }
4167 else
4168 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4169 if ((j & (k - 1)) != k - 1)
4170 continue;
4171 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4172 new_stmt
b731b390 4173 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
4174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4175
4176 if ((unsigned) j == k - 1)
4177 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4178 else
4179 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4180
4181 prev_stmt_info = vinfo_for_stmt (new_stmt);
4182 continue;
4183 }
4184 else if (ratype)
4185 {
4186 tree t = build_fold_addr_expr (new_temp);
4187 t = build2 (MEM_REF, vectype, t,
4188 build_int_cst (TREE_TYPE (t), 0));
4189 new_stmt
b731b390 4190 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0 4191 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4192 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4193 }
4194 }
4195
4196 if (j == 0)
4197 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4198 else
4199 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4200
4201 prev_stmt_info = vinfo_for_stmt (new_stmt);
4202 }
4203
4204 vargs.release ();
4205
4206 /* The call in STMT might prevent it from being removed in dce.
4207 We however cannot remove it here, due to the way the ssa name
4208 it defines is mapped to the new definition. So just replace
4209 rhs of the statement with something harmless. */
4210
4211 if (slp_node)
4212 return true;
4213
4214 if (scalar_dest)
4215 {
4216 type = TREE_TYPE (scalar_dest);
4217 if (is_pattern_stmt_p (stmt_info))
4218 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4219 else
4220 lhs = gimple_call_lhs (stmt);
4221 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4222 }
4223 else
4224 new_stmt = gimple_build_nop ();
4225 set_vinfo_for_stmt (new_stmt, stmt_info);
4226 set_vinfo_for_stmt (stmt, NULL);
4227 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4228 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4229 unlink_stmt_vdef (stmt);
4230
4231 return true;
4232}
4233
4234
ebfd146a
IR
4235/* Function vect_gen_widened_results_half
4236
4237 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4238 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4239 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4240 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4241 needs to be created (DECL is a function-decl of a target-builtin).
4242 STMT is the original scalar stmt that we are vectorizing. */
4243
355fe088 4244static gimple *
ebfd146a
IR
4245vect_gen_widened_results_half (enum tree_code code,
4246 tree decl,
4247 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4248 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4249 gimple *stmt)
b8698a0f 4250{
355fe088 4251 gimple *new_stmt;
b8698a0f
L
4252 tree new_temp;
4253
4254 /* Generate half of the widened result: */
4255 if (code == CALL_EXPR)
4256 {
4257 /* Target specific support */
ebfd146a
IR
4258 if (op_type == binary_op)
4259 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4260 else
4261 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4262 new_temp = make_ssa_name (vec_dest, new_stmt);
4263 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4264 }
4265 else
ebfd146a 4266 {
b8698a0f
L
4267 /* Generic support */
4268 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4269 if (op_type != binary_op)
4270 vec_oprnd1 = NULL;
0d0e4a03 4271 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4272 new_temp = make_ssa_name (vec_dest, new_stmt);
4273 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4274 }
ebfd146a
IR
4275 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4276
ebfd146a
IR
4277 return new_stmt;
4278}
4279
4a00c761
JJ
4280
4281/* Get vectorized definitions for loop-based vectorization. For the first
4282 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4283 scalar operand), and for the rest we get a copy with
4284 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4285 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4286 The vectors are collected into VEC_OPRNDS. */
4287
4288static void
355fe088 4289vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4290 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4291{
4292 tree vec_oprnd;
4293
4294 /* Get first vector operand. */
4295 /* All the vector operands except the very first one (that is scalar oprnd)
4296 are stmt copies. */
4297 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4298 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4299 else
4300 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4301
9771b263 4302 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4303
4304 /* Get second vector operand. */
4305 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4306 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4307
4308 *oprnd = vec_oprnd;
4309
4310 /* For conversion in multiple steps, continue to get operands
4311 recursively. */
4312 if (multi_step_cvt)
4313 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4314}
4315
4316
4317/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4318 For multi-step conversions store the resulting vectors and call the function
4319 recursively. */
4320
4321static void
9771b263 4322vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4323 int multi_step_cvt, gimple *stmt,
9771b263 4324 vec<tree> vec_dsts,
4a00c761
JJ
4325 gimple_stmt_iterator *gsi,
4326 slp_tree slp_node, enum tree_code code,
4327 stmt_vec_info *prev_stmt_info)
4328{
4329 unsigned int i;
4330 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4331 gimple *new_stmt;
4a00c761
JJ
4332 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4333
9771b263 4334 vec_dest = vec_dsts.pop ();
4a00c761 4335
9771b263 4336 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4337 {
4338 /* Create demotion operation. */
9771b263
DN
4339 vop0 = (*vec_oprnds)[i];
4340 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4341 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4342 new_tmp = make_ssa_name (vec_dest, new_stmt);
4343 gimple_assign_set_lhs (new_stmt, new_tmp);
4344 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4345
4346 if (multi_step_cvt)
4347 /* Store the resulting vector for next recursive call. */
9771b263 4348 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4349 else
4350 {
4351 /* This is the last step of the conversion sequence. Store the
4352 vectors in SLP_NODE or in vector info of the scalar statement
4353 (or in STMT_VINFO_RELATED_STMT chain). */
4354 if (slp_node)
9771b263 4355 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4356 else
c689ce1e
RB
4357 {
4358 if (!*prev_stmt_info)
4359 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4360 else
4361 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4362
c689ce1e
RB
4363 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4364 }
4a00c761
JJ
4365 }
4366 }
4367
4368 /* For multi-step demotion operations we first generate demotion operations
4369 from the source type to the intermediate types, and then combine the
4370 results (stored in VEC_OPRNDS) in demotion operation to the destination
4371 type. */
4372 if (multi_step_cvt)
4373 {
4374 /* At each level of recursion we have half of the operands we had at the
4375 previous level. */
9771b263 4376 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4377 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4378 stmt, vec_dsts, gsi, slp_node,
4379 VEC_PACK_TRUNC_EXPR,
4380 prev_stmt_info);
4381 }
4382
9771b263 4383 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4384}
4385
4386
4387/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4388 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4389 the resulting vectors and call the function recursively. */
4390
4391static void
9771b263
DN
4392vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4393 vec<tree> *vec_oprnds1,
355fe088 4394 gimple *stmt, tree vec_dest,
4a00c761
JJ
4395 gimple_stmt_iterator *gsi,
4396 enum tree_code code1,
4397 enum tree_code code2, tree decl1,
4398 tree decl2, int op_type)
4399{
4400 int i;
4401 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4402 gimple *new_stmt1, *new_stmt2;
6e1aa848 4403 vec<tree> vec_tmp = vNULL;
4a00c761 4404
9771b263
DN
4405 vec_tmp.create (vec_oprnds0->length () * 2);
4406 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4407 {
4408 if (op_type == binary_op)
9771b263 4409 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4410 else
4411 vop1 = NULL_TREE;
4412
4413 /* Generate the two halves of promotion operation. */
4414 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4415 op_type, vec_dest, gsi, stmt);
4416 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4417 op_type, vec_dest, gsi, stmt);
4418 if (is_gimple_call (new_stmt1))
4419 {
4420 new_tmp1 = gimple_call_lhs (new_stmt1);
4421 new_tmp2 = gimple_call_lhs (new_stmt2);
4422 }
4423 else
4424 {
4425 new_tmp1 = gimple_assign_lhs (new_stmt1);
4426 new_tmp2 = gimple_assign_lhs (new_stmt2);
4427 }
4428
4429 /* Store the results for the next step. */
9771b263
DN
4430 vec_tmp.quick_push (new_tmp1);
4431 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4432 }
4433
689eaba3 4434 vec_oprnds0->release ();
4a00c761
JJ
4435 *vec_oprnds0 = vec_tmp;
4436}
4437
4438
b8698a0f
L
4439/* Check if STMT performs a conversion operation, that can be vectorized.
4440 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4441 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4442 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4443
4444static bool
355fe088
TS
4445vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4446 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4447{
4448 tree vec_dest;
4449 tree scalar_dest;
4a00c761 4450 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4451 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4452 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4453 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4454 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4455 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4456 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4457 tree new_temp;
355fe088 4458 gimple *def_stmt;
ebfd146a 4459 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4460 int ndts = 2;
355fe088 4461 gimple *new_stmt = NULL;
ebfd146a 4462 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4463 poly_uint64 nunits_in;
4464 poly_uint64 nunits_out;
ebfd146a 4465 tree vectype_out, vectype_in;
4a00c761
JJ
4466 int ncopies, i, j;
4467 tree lhs_type, rhs_type;
ebfd146a 4468 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4469 vec<tree> vec_oprnds0 = vNULL;
4470 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4471 tree vop0;
4a00c761 4472 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4473 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4474 int multi_step_cvt = 0;
6e1aa848 4475 vec<tree> interm_types = vNULL;
4a00c761
JJ
4476 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4477 int op_type;
4a00c761 4478 unsigned short fltsz;
ebfd146a
IR
4479
4480 /* Is STMT a vectorizable conversion? */
4481
4a00c761 4482 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4483 return false;
4484
66c16fd9
RB
4485 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4486 && ! vec_stmt)
ebfd146a
IR
4487 return false;
4488
4489 if (!is_gimple_assign (stmt))
4490 return false;
4491
4492 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4493 return false;
4494
4495 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4496 if (!CONVERT_EXPR_CODE_P (code)
4497 && code != FIX_TRUNC_EXPR
4498 && code != FLOAT_EXPR
4499 && code != WIDEN_MULT_EXPR
4500 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4501 return false;
4502
4a00c761
JJ
4503 op_type = TREE_CODE_LENGTH (code);
4504
ebfd146a 4505 /* Check types of lhs and rhs. */
b690cc0f 4506 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4507 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4508 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4509
ebfd146a
IR
4510 op0 = gimple_assign_rhs1 (stmt);
4511 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4512
4513 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4514 && !((INTEGRAL_TYPE_P (lhs_type)
4515 && INTEGRAL_TYPE_P (rhs_type))
4516 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4517 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4518 return false;
4519
e6f5c25d
IE
4520 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4521 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4522 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4523 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4524 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4525 {
73fbfcad 4526 if (dump_enabled_p ())
78c60e3d 4527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4528 "type conversion to/from bit-precision unsupported."
4529 "\n");
4a00c761
JJ
4530 return false;
4531 }
4532
b690cc0f 4533 /* Check the operands of the operation. */
81c40241 4534 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4535 {
73fbfcad 4536 if (dump_enabled_p ())
78c60e3d 4537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4538 "use not simple.\n");
b690cc0f
RG
4539 return false;
4540 }
4a00c761
JJ
4541 if (op_type == binary_op)
4542 {
4543 bool ok;
4544
4545 op1 = gimple_assign_rhs2 (stmt);
4546 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4547 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4548 OP1. */
4549 if (CONSTANT_CLASS_P (op0))
81c40241 4550 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4551 else
81c40241 4552 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4553
4554 if (!ok)
4555 {
73fbfcad 4556 if (dump_enabled_p ())
78c60e3d 4557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4558 "use not simple.\n");
4a00c761
JJ
4559 return false;
4560 }
4561 }
4562
b690cc0f
RG
4563 /* If op0 is an external or constant defs use a vector type of
4564 the same size as the output vector type. */
ebfd146a 4565 if (!vectype_in)
b690cc0f 4566 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4567 if (vec_stmt)
4568 gcc_assert (vectype_in);
4569 if (!vectype_in)
4570 {
73fbfcad 4571 if (dump_enabled_p ())
4a00c761 4572 {
78c60e3d
SS
4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4574 "no vectype for scalar type ");
4575 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4576 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4577 }
7d8930a0
IR
4578
4579 return false;
4580 }
ebfd146a 4581
e6f5c25d
IE
4582 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4583 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4584 {
4585 if (dump_enabled_p ())
4586 {
4587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4588 "can't convert between boolean and non "
4589 "boolean vectors");
4590 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4591 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4592 }
4593
4594 return false;
4595 }
4596
b690cc0f
RG
4597 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4598 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4599 if (known_eq (nunits_out, nunits_in))
ebfd146a 4600 modifier = NONE;
062d5ccc
RS
4601 else if (multiple_p (nunits_out, nunits_in))
4602 modifier = NARROW;
ebfd146a 4603 else
062d5ccc
RS
4604 {
4605 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4606 modifier = WIDEN;
4607 }
ebfd146a 4608
ff802fa1
IR
4609 /* Multiple types in SLP are handled by creating the appropriate number of
4610 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4611 case of SLP. */
fce57248 4612 if (slp_node)
ebfd146a 4613 ncopies = 1;
4a00c761 4614 else if (modifier == NARROW)
e8f142e2 4615 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4616 else
e8f142e2 4617 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4618
ebfd146a
IR
4619 /* Sanity check: make sure that at least one copy of the vectorized stmt
4620 needs to be generated. */
4621 gcc_assert (ncopies >= 1);
4622
16d22000
RS
4623 bool found_mode = false;
4624 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4625 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4626 opt_scalar_mode rhs_mode_iter;
b397965c 4627
ebfd146a 4628 /* Supportable by target? */
4a00c761 4629 switch (modifier)
ebfd146a 4630 {
4a00c761
JJ
4631 case NONE:
4632 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4633 return false;
4634 if (supportable_convert_operation (code, vectype_out, vectype_in,
4635 &decl1, &code1))
4636 break;
4637 /* FALLTHRU */
4638 unsupported:
73fbfcad 4639 if (dump_enabled_p ())
78c60e3d 4640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4641 "conversion not supported by target.\n");
ebfd146a 4642 return false;
ebfd146a 4643
4a00c761
JJ
4644 case WIDEN:
4645 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4646 &code1, &code2, &multi_step_cvt,
4647 &interm_types))
4a00c761
JJ
4648 {
4649 /* Binary widening operation can only be supported directly by the
4650 architecture. */
4651 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4652 break;
4653 }
4654
4655 if (code != FLOAT_EXPR
b397965c 4656 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4657 goto unsupported;
4658
b397965c 4659 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4660 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4661 {
16d22000 4662 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4663 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4664 break;
4665
4a00c761
JJ
4666 cvt_type
4667 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4668 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4669 if (cvt_type == NULL_TREE)
4670 goto unsupported;
4671
4672 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4673 {
4674 if (!supportable_convert_operation (code, vectype_out,
4675 cvt_type, &decl1, &codecvt1))
4676 goto unsupported;
4677 }
4678 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4679 cvt_type, &codecvt1,
4680 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4681 &interm_types))
4682 continue;
4683 else
4684 gcc_assert (multi_step_cvt == 0);
4685
4686 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4687 vectype_in, &code1, &code2,
4688 &multi_step_cvt, &interm_types))
16d22000
RS
4689 {
4690 found_mode = true;
4691 break;
4692 }
4a00c761
JJ
4693 }
4694
16d22000 4695 if (!found_mode)
4a00c761
JJ
4696 goto unsupported;
4697
4698 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4699 codecvt2 = ERROR_MARK;
4700 else
4701 {
4702 multi_step_cvt++;
9771b263 4703 interm_types.safe_push (cvt_type);
4a00c761
JJ
4704 cvt_type = NULL_TREE;
4705 }
4706 break;
4707
4708 case NARROW:
4709 gcc_assert (op_type == unary_op);
4710 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4711 &code1, &multi_step_cvt,
4712 &interm_types))
4713 break;
4714
4715 if (code != FIX_TRUNC_EXPR
b397965c 4716 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4717 goto unsupported;
4718
4a00c761
JJ
4719 cvt_type
4720 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4721 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4722 if (cvt_type == NULL_TREE)
4723 goto unsupported;
4724 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4725 &decl1, &codecvt1))
4726 goto unsupported;
4727 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4728 &code1, &multi_step_cvt,
4729 &interm_types))
4730 break;
4731 goto unsupported;
4732
4733 default:
4734 gcc_unreachable ();
ebfd146a
IR
4735 }
4736
4737 if (!vec_stmt) /* transformation not required. */
4738 {
73fbfcad 4739 if (dump_enabled_p ())
78c60e3d 4740 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4741 "=== vectorizable_conversion ===\n");
4a00c761 4742 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4743 {
4744 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
78604de0
RB
4745 if (!slp_node)
4746 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
8bd37302 4747 }
4a00c761
JJ
4748 else if (modifier == NARROW)
4749 {
4750 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
78604de0
RB
4751 if (!slp_node)
4752 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
4753 }
4754 else
4755 {
4756 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
78604de0
RB
4757 if (!slp_node)
4758 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 4759 }
9771b263 4760 interm_types.release ();
ebfd146a
IR
4761 return true;
4762 }
4763
67b8dbac 4764 /* Transform. */
73fbfcad 4765 if (dump_enabled_p ())
78c60e3d 4766 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4767 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4768
4a00c761
JJ
4769 if (op_type == binary_op)
4770 {
4771 if (CONSTANT_CLASS_P (op0))
4772 op0 = fold_convert (TREE_TYPE (op1), op0);
4773 else if (CONSTANT_CLASS_P (op1))
4774 op1 = fold_convert (TREE_TYPE (op0), op1);
4775 }
4776
4777 /* In case of multi-step conversion, we first generate conversion operations
4778 to the intermediate types, and then from that types to the final one.
4779 We create vector destinations for the intermediate type (TYPES) received
4780 from supportable_*_operation, and store them in the correct order
4781 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4782 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4783 vec_dest = vect_create_destination_var (scalar_dest,
4784 (cvt_type && modifier == WIDEN)
4785 ? cvt_type : vectype_out);
9771b263 4786 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4787
4788 if (multi_step_cvt)
4789 {
9771b263
DN
4790 for (i = interm_types.length () - 1;
4791 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4792 {
4793 vec_dest = vect_create_destination_var (scalar_dest,
4794 intermediate_type);
9771b263 4795 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4796 }
4797 }
ebfd146a 4798
4a00c761 4799 if (cvt_type)
82294ec1
JJ
4800 vec_dest = vect_create_destination_var (scalar_dest,
4801 modifier == WIDEN
4802 ? vectype_out : cvt_type);
4a00c761
JJ
4803
4804 if (!slp_node)
4805 {
30862efc 4806 if (modifier == WIDEN)
4a00c761 4807 {
c3284718 4808 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4809 if (op_type == binary_op)
9771b263 4810 vec_oprnds1.create (1);
4a00c761 4811 }
30862efc 4812 else if (modifier == NARROW)
9771b263
DN
4813 vec_oprnds0.create (
4814 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4815 }
4816 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4817 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4818
4a00c761 4819 last_oprnd = op0;
ebfd146a
IR
4820 prev_stmt_info = NULL;
4821 switch (modifier)
4822 {
4823 case NONE:
4824 for (j = 0; j < ncopies; j++)
4825 {
ebfd146a 4826 if (j == 0)
306b0c92 4827 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4828 else
4829 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4830
9771b263 4831 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4832 {
4833 /* Arguments are ready, create the new vector stmt. */
4834 if (code1 == CALL_EXPR)
4835 {
4836 new_stmt = gimple_build_call (decl1, 1, vop0);
4837 new_temp = make_ssa_name (vec_dest, new_stmt);
4838 gimple_call_set_lhs (new_stmt, new_temp);
4839 }
4840 else
4841 {
4842 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4843 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4844 new_temp = make_ssa_name (vec_dest, new_stmt);
4845 gimple_assign_set_lhs (new_stmt, new_temp);
4846 }
4847
4848 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4849 if (slp_node)
9771b263 4850 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4851 else
4852 {
4853 if (!prev_stmt_info)
4854 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4855 else
4856 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4857 prev_stmt_info = vinfo_for_stmt (new_stmt);
4858 }
4a00c761 4859 }
ebfd146a
IR
4860 }
4861 break;
4862
4863 case WIDEN:
4864 /* In case the vectorization factor (VF) is bigger than the number
4865 of elements that we can fit in a vectype (nunits), we have to
4866 generate more than one vector stmt - i.e - we need to "unroll"
4867 the vector stmt by a factor VF/nunits. */
4868 for (j = 0; j < ncopies; j++)
4869 {
4a00c761 4870 /* Handle uses. */
ebfd146a 4871 if (j == 0)
4a00c761
JJ
4872 {
4873 if (slp_node)
4874 {
4875 if (code == WIDEN_LSHIFT_EXPR)
4876 {
4877 unsigned int k;
ebfd146a 4878
4a00c761
JJ
4879 vec_oprnd1 = op1;
4880 /* Store vec_oprnd1 for every vector stmt to be created
4881 for SLP_NODE. We check during the analysis that all
4882 the shift arguments are the same. */
4883 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4884 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4885
4886 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4887 slp_node);
4a00c761
JJ
4888 }
4889 else
4890 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4891 &vec_oprnds1, slp_node);
4a00c761
JJ
4892 }
4893 else
4894 {
81c40241 4895 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4896 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4897 if (op_type == binary_op)
4898 {
4899 if (code == WIDEN_LSHIFT_EXPR)
4900 vec_oprnd1 = op1;
4901 else
81c40241 4902 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4903 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4904 }
4905 }
4906 }
ebfd146a 4907 else
4a00c761
JJ
4908 {
4909 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4910 vec_oprnds0.truncate (0);
4911 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4912 if (op_type == binary_op)
4913 {
4914 if (code == WIDEN_LSHIFT_EXPR)
4915 vec_oprnd1 = op1;
4916 else
4917 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4918 vec_oprnd1);
9771b263
DN
4919 vec_oprnds1.truncate (0);
4920 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4921 }
4922 }
ebfd146a 4923
4a00c761
JJ
4924 /* Arguments are ready. Create the new vector stmts. */
4925 for (i = multi_step_cvt; i >= 0; i--)
4926 {
9771b263 4927 tree this_dest = vec_dsts[i];
4a00c761
JJ
4928 enum tree_code c1 = code1, c2 = code2;
4929 if (i == 0 && codecvt2 != ERROR_MARK)
4930 {
4931 c1 = codecvt1;
4932 c2 = codecvt2;
4933 }
4934 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4935 &vec_oprnds1,
4936 stmt, this_dest, gsi,
4937 c1, c2, decl1, decl2,
4938 op_type);
4939 }
4940
9771b263 4941 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4942 {
4943 if (cvt_type)
4944 {
4945 if (codecvt1 == CALL_EXPR)
4946 {
4947 new_stmt = gimple_build_call (decl1, 1, vop0);
4948 new_temp = make_ssa_name (vec_dest, new_stmt);
4949 gimple_call_set_lhs (new_stmt, new_temp);
4950 }
4951 else
4952 {
4953 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4954 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4955 new_stmt = gimple_build_assign (new_temp, codecvt1,
4956 vop0);
4a00c761
JJ
4957 }
4958
4959 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4960 }
4961 else
4962 new_stmt = SSA_NAME_DEF_STMT (vop0);
4963
4964 if (slp_node)
9771b263 4965 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4966 else
c689ce1e
RB
4967 {
4968 if (!prev_stmt_info)
4969 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4970 else
4971 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4972 prev_stmt_info = vinfo_for_stmt (new_stmt);
4973 }
4a00c761 4974 }
ebfd146a 4975 }
4a00c761
JJ
4976
4977 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4978 break;
4979
4980 case NARROW:
4981 /* In case the vectorization factor (VF) is bigger than the number
4982 of elements that we can fit in a vectype (nunits), we have to
4983 generate more than one vector stmt - i.e - we need to "unroll"
4984 the vector stmt by a factor VF/nunits. */
4985 for (j = 0; j < ncopies; j++)
4986 {
4987 /* Handle uses. */
4a00c761
JJ
4988 if (slp_node)
4989 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4990 slp_node);
ebfd146a
IR
4991 else
4992 {
9771b263 4993 vec_oprnds0.truncate (0);
4a00c761
JJ
4994 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4995 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4996 }
4997
4a00c761
JJ
4998 /* Arguments are ready. Create the new vector stmts. */
4999 if (cvt_type)
9771b263 5000 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5001 {
5002 if (codecvt1 == CALL_EXPR)
5003 {
5004 new_stmt = gimple_build_call (decl1, 1, vop0);
5005 new_temp = make_ssa_name (vec_dest, new_stmt);
5006 gimple_call_set_lhs (new_stmt, new_temp);
5007 }
5008 else
5009 {
5010 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5011 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5012 new_stmt = gimple_build_assign (new_temp, codecvt1,
5013 vop0);
4a00c761 5014 }
ebfd146a 5015
4a00c761 5016 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 5017 vec_oprnds0[i] = new_temp;
4a00c761 5018 }
ebfd146a 5019
4a00c761
JJ
5020 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5021 stmt, vec_dsts, gsi,
5022 slp_node, code1,
5023 &prev_stmt_info);
ebfd146a
IR
5024 }
5025
5026 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5027 break;
ebfd146a
IR
5028 }
5029
9771b263
DN
5030 vec_oprnds0.release ();
5031 vec_oprnds1.release ();
9771b263 5032 interm_types.release ();
ebfd146a
IR
5033
5034 return true;
5035}
ff802fa1
IR
5036
5037
ebfd146a
IR
5038/* Function vectorizable_assignment.
5039
b8698a0f
L
5040 Check if STMT performs an assignment (copy) that can be vectorized.
5041 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5042 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5043 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5044
5045static bool
355fe088
TS
5046vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5047 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
5048{
5049 tree vec_dest;
5050 tree scalar_dest;
5051 tree op;
5052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5053 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5054 tree new_temp;
355fe088 5055 gimple *def_stmt;
4fc5ebf1
JG
5056 enum vect_def_type dt[1] = {vect_unknown_def_type};
5057 int ndts = 1;
ebfd146a 5058 int ncopies;
f18b55bd 5059 int i, j;
6e1aa848 5060 vec<tree> vec_oprnds = vNULL;
ebfd146a 5061 tree vop;
a70d6342 5062 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5063 vec_info *vinfo = stmt_info->vinfo;
355fe088 5064 gimple *new_stmt = NULL;
f18b55bd 5065 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5066 enum tree_code code;
5067 tree vectype_in;
ebfd146a 5068
a70d6342 5069 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5070 return false;
5071
66c16fd9
RB
5072 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5073 && ! vec_stmt)
ebfd146a
IR
5074 return false;
5075
5076 /* Is vectorizable assignment? */
5077 if (!is_gimple_assign (stmt))
5078 return false;
5079
5080 scalar_dest = gimple_assign_lhs (stmt);
5081 if (TREE_CODE (scalar_dest) != SSA_NAME)
5082 return false;
5083
fde9c428 5084 code = gimple_assign_rhs_code (stmt);
ebfd146a 5085 if (gimple_assign_single_p (stmt)
fde9c428
RG
5086 || code == PAREN_EXPR
5087 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5088 op = gimple_assign_rhs1 (stmt);
5089 else
5090 return false;
5091
7b7ec6c5
RG
5092 if (code == VIEW_CONVERT_EXPR)
5093 op = TREE_OPERAND (op, 0);
5094
465c8c19 5095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5096 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5097
5098 /* Multiple types in SLP are handled by creating the appropriate number of
5099 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5100 case of SLP. */
fce57248 5101 if (slp_node)
465c8c19
JJ
5102 ncopies = 1;
5103 else
e8f142e2 5104 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5105
5106 gcc_assert (ncopies >= 1);
5107
81c40241 5108 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 5109 {
73fbfcad 5110 if (dump_enabled_p ())
78c60e3d 5111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5112 "use not simple.\n");
ebfd146a
IR
5113 return false;
5114 }
5115
fde9c428
RG
5116 /* We can handle NOP_EXPR conversions that do not change the number
5117 of elements or the vector size. */
7b7ec6c5
RG
5118 if ((CONVERT_EXPR_CODE_P (code)
5119 || code == VIEW_CONVERT_EXPR)
fde9c428 5120 && (!vectype_in
928686b1 5121 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5122 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5123 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5124 return false;
5125
7b7b1813
RG
5126 /* We do not handle bit-precision changes. */
5127 if ((CONVERT_EXPR_CODE_P (code)
5128 || code == VIEW_CONVERT_EXPR)
5129 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5130 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5131 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5132 /* But a conversion that does not change the bit-pattern is ok. */
5133 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5134 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5135 && TYPE_UNSIGNED (TREE_TYPE (op)))
5136 /* Conversion between boolean types of different sizes is
5137 a simple assignment in case their vectypes are same
5138 boolean vectors. */
5139 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5140 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5141 {
73fbfcad 5142 if (dump_enabled_p ())
78c60e3d
SS
5143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5144 "type conversion to/from bit-precision "
e645e942 5145 "unsupported.\n");
7b7b1813
RG
5146 return false;
5147 }
5148
ebfd146a
IR
5149 if (!vec_stmt) /* transformation not required. */
5150 {
5151 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 5152 if (dump_enabled_p ())
78c60e3d 5153 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5154 "=== vectorizable_assignment ===\n");
78604de0
RB
5155 if (!slp_node)
5156 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5157 return true;
5158 }
5159
67b8dbac 5160 /* Transform. */
73fbfcad 5161 if (dump_enabled_p ())
e645e942 5162 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5163
5164 /* Handle def. */
5165 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5166
5167 /* Handle use. */
f18b55bd 5168 for (j = 0; j < ncopies; j++)
ebfd146a 5169 {
f18b55bd
IR
5170 /* Handle uses. */
5171 if (j == 0)
306b0c92 5172 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5173 else
5174 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5175
5176 /* Arguments are ready. create the new vector stmt. */
9771b263 5177 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5178 {
7b7ec6c5
RG
5179 if (CONVERT_EXPR_CODE_P (code)
5180 || code == VIEW_CONVERT_EXPR)
4a73490d 5181 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
5182 new_stmt = gimple_build_assign (vec_dest, vop);
5183 new_temp = make_ssa_name (vec_dest, new_stmt);
5184 gimple_assign_set_lhs (new_stmt, new_temp);
5185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5186 if (slp_node)
9771b263 5187 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 5188 }
ebfd146a
IR
5189
5190 if (slp_node)
f18b55bd
IR
5191 continue;
5192
5193 if (j == 0)
5194 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5195 else
5196 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5197
5198 prev_stmt_info = vinfo_for_stmt (new_stmt);
5199 }
b8698a0f 5200
9771b263 5201 vec_oprnds.release ();
ebfd146a
IR
5202 return true;
5203}
5204
9dc3f7de 5205
1107f3ae
IR
5206/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5207 either as shift by a scalar or by a vector. */
5208
5209bool
5210vect_supportable_shift (enum tree_code code, tree scalar_type)
5211{
5212
ef4bddc2 5213 machine_mode vec_mode;
1107f3ae
IR
5214 optab optab;
5215 int icode;
5216 tree vectype;
5217
5218 vectype = get_vectype_for_scalar_type (scalar_type);
5219 if (!vectype)
5220 return false;
5221
5222 optab = optab_for_tree_code (code, vectype, optab_scalar);
5223 if (!optab
5224 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5225 {
5226 optab = optab_for_tree_code (code, vectype, optab_vector);
5227 if (!optab
5228 || (optab_handler (optab, TYPE_MODE (vectype))
5229 == CODE_FOR_nothing))
5230 return false;
5231 }
5232
5233 vec_mode = TYPE_MODE (vectype);
5234 icode = (int) optab_handler (optab, vec_mode);
5235 if (icode == CODE_FOR_nothing)
5236 return false;
5237
5238 return true;
5239}
5240
5241
9dc3f7de
IR
5242/* Function vectorizable_shift.
5243
5244 Check if STMT performs a shift operation that can be vectorized.
5245 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5246 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5247 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5248
5249static bool
355fe088
TS
5250vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5251 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
5252{
5253 tree vec_dest;
5254 tree scalar_dest;
5255 tree op0, op1 = NULL;
5256 tree vec_oprnd1 = NULL_TREE;
5257 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5258 tree vectype;
5259 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5260 enum tree_code code;
ef4bddc2 5261 machine_mode vec_mode;
9dc3f7de
IR
5262 tree new_temp;
5263 optab optab;
5264 int icode;
ef4bddc2 5265 machine_mode optab_op2_mode;
355fe088 5266 gimple *def_stmt;
9dc3f7de 5267 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5268 int ndts = 2;
355fe088 5269 gimple *new_stmt = NULL;
9dc3f7de 5270 stmt_vec_info prev_stmt_info;
928686b1
RS
5271 poly_uint64 nunits_in;
5272 poly_uint64 nunits_out;
9dc3f7de 5273 tree vectype_out;
cede2577 5274 tree op1_vectype;
9dc3f7de
IR
5275 int ncopies;
5276 int j, i;
6e1aa848
DN
5277 vec<tree> vec_oprnds0 = vNULL;
5278 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5279 tree vop0, vop1;
5280 unsigned int k;
49eab32e 5281 bool scalar_shift_arg = true;
9dc3f7de 5282 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5283 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5284
5285 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5286 return false;
5287
66c16fd9
RB
5288 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5289 && ! vec_stmt)
9dc3f7de
IR
5290 return false;
5291
5292 /* Is STMT a vectorizable binary/unary operation? */
5293 if (!is_gimple_assign (stmt))
5294 return false;
5295
5296 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5297 return false;
5298
5299 code = gimple_assign_rhs_code (stmt);
5300
5301 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5302 || code == RROTATE_EXPR))
5303 return false;
5304
5305 scalar_dest = gimple_assign_lhs (stmt);
5306 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5307 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5308 {
73fbfcad 5309 if (dump_enabled_p ())
78c60e3d 5310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5311 "bit-precision shifts not supported.\n");
7b7b1813
RG
5312 return false;
5313 }
9dc3f7de
IR
5314
5315 op0 = gimple_assign_rhs1 (stmt);
81c40241 5316 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 5317 {
73fbfcad 5318 if (dump_enabled_p ())
78c60e3d 5319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5320 "use not simple.\n");
9dc3f7de
IR
5321 return false;
5322 }
5323 /* If op0 is an external or constant def use a vector type with
5324 the same size as the output vector type. */
5325 if (!vectype)
5326 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5327 if (vec_stmt)
5328 gcc_assert (vectype);
5329 if (!vectype)
5330 {
73fbfcad 5331 if (dump_enabled_p ())
78c60e3d 5332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5333 "no vectype for scalar type\n");
9dc3f7de
IR
5334 return false;
5335 }
5336
5337 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5338 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5339 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5340 return false;
5341
5342 op1 = gimple_assign_rhs2 (stmt);
81c40241 5343 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 5344 {
73fbfcad 5345 if (dump_enabled_p ())
78c60e3d 5346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5347 "use not simple.\n");
9dc3f7de
IR
5348 return false;
5349 }
5350
9dc3f7de
IR
5351 /* Multiple types in SLP are handled by creating the appropriate number of
5352 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5353 case of SLP. */
fce57248 5354 if (slp_node)
9dc3f7de
IR
5355 ncopies = 1;
5356 else
e8f142e2 5357 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5358
5359 gcc_assert (ncopies >= 1);
5360
5361 /* Determine whether the shift amount is a vector, or scalar. If the
5362 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5363
dbfa87aa
YR
5364 if ((dt[1] == vect_internal_def
5365 || dt[1] == vect_induction_def)
5366 && !slp_node)
49eab32e
JJ
5367 scalar_shift_arg = false;
5368 else if (dt[1] == vect_constant_def
5369 || dt[1] == vect_external_def
5370 || dt[1] == vect_internal_def)
5371 {
5372 /* In SLP, need to check whether the shift count is the same,
5373 in loops if it is a constant or invariant, it is always
5374 a scalar shift. */
5375 if (slp_node)
5376 {
355fe088
TS
5377 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5378 gimple *slpstmt;
49eab32e 5379
9771b263 5380 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5381 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5382 scalar_shift_arg = false;
5383 }
60d393e8
RB
5384
5385 /* If the shift amount is computed by a pattern stmt we cannot
5386 use the scalar amount directly thus give up and use a vector
5387 shift. */
5388 if (dt[1] == vect_internal_def)
5389 {
5390 gimple *def = SSA_NAME_DEF_STMT (op1);
5391 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5392 scalar_shift_arg = false;
5393 }
49eab32e
JJ
5394 }
5395 else
5396 {
73fbfcad 5397 if (dump_enabled_p ())
78c60e3d 5398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5399 "operand mode requires invariant argument.\n");
49eab32e
JJ
5400 return false;
5401 }
5402
9dc3f7de 5403 /* Vector shifted by vector. */
49eab32e 5404 if (!scalar_shift_arg)
9dc3f7de
IR
5405 {
5406 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5407 if (dump_enabled_p ())
78c60e3d 5408 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5409 "vector/vector shift/rotate found.\n");
78c60e3d 5410
aa948027
JJ
5411 if (!op1_vectype)
5412 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5413 if (op1_vectype == NULL_TREE
5414 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5415 {
73fbfcad 5416 if (dump_enabled_p ())
78c60e3d
SS
5417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5418 "unusable type for last operand in"
e645e942 5419 " vector/vector shift/rotate.\n");
cede2577
JJ
5420 return false;
5421 }
9dc3f7de
IR
5422 }
5423 /* See if the machine has a vector shifted by scalar insn and if not
5424 then see if it has a vector shifted by vector insn. */
49eab32e 5425 else
9dc3f7de
IR
5426 {
5427 optab = optab_for_tree_code (code, vectype, optab_scalar);
5428 if (optab
5429 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5430 {
73fbfcad 5431 if (dump_enabled_p ())
78c60e3d 5432 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5433 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5434 }
5435 else
5436 {
5437 optab = optab_for_tree_code (code, vectype, optab_vector);
5438 if (optab
5439 && (optab_handler (optab, TYPE_MODE (vectype))
5440 != CODE_FOR_nothing))
5441 {
49eab32e
JJ
5442 scalar_shift_arg = false;
5443
73fbfcad 5444 if (dump_enabled_p ())
78c60e3d 5445 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5446 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5447
5448 /* Unlike the other binary operators, shifts/rotates have
5449 the rhs being int, instead of the same type as the lhs,
5450 so make sure the scalar is the right type if we are
aa948027 5451 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5452 if (dt[1] == vect_constant_def)
5453 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5454 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5455 TREE_TYPE (op1)))
5456 {
5457 if (slp_node
5458 && TYPE_MODE (TREE_TYPE (vectype))
5459 != TYPE_MODE (TREE_TYPE (op1)))
5460 {
73fbfcad 5461 if (dump_enabled_p ())
78c60e3d
SS
5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5463 "unusable type for last operand in"
e645e942 5464 " vector/vector shift/rotate.\n");
21c0a521 5465 return false;
aa948027
JJ
5466 }
5467 if (vec_stmt && !slp_node)
5468 {
5469 op1 = fold_convert (TREE_TYPE (vectype), op1);
5470 op1 = vect_init_vector (stmt, op1,
5471 TREE_TYPE (vectype), NULL);
5472 }
5473 }
9dc3f7de
IR
5474 }
5475 }
5476 }
9dc3f7de
IR
5477
5478 /* Supportable by target? */
5479 if (!optab)
5480 {
73fbfcad 5481 if (dump_enabled_p ())
78c60e3d 5482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5483 "no optab.\n");
9dc3f7de
IR
5484 return false;
5485 }
5486 vec_mode = TYPE_MODE (vectype);
5487 icode = (int) optab_handler (optab, vec_mode);
5488 if (icode == CODE_FOR_nothing)
5489 {
73fbfcad 5490 if (dump_enabled_p ())
78c60e3d 5491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5492 "op not supported by target.\n");
9dc3f7de 5493 /* Check only during analysis. */
cf098191 5494 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5495 || (!vec_stmt
5496 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5497 return false;
73fbfcad 5498 if (dump_enabled_p ())
e645e942
TJ
5499 dump_printf_loc (MSG_NOTE, vect_location,
5500 "proceeding using word mode.\n");
9dc3f7de
IR
5501 }
5502
5503 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5504 if (!vec_stmt
5505 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5506 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5507 {
73fbfcad 5508 if (dump_enabled_p ())
78c60e3d 5509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5510 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5511 return false;
5512 }
5513
5514 if (!vec_stmt) /* transformation not required. */
5515 {
5516 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5517 if (dump_enabled_p ())
e645e942
TJ
5518 dump_printf_loc (MSG_NOTE, vect_location,
5519 "=== vectorizable_shift ===\n");
78604de0
RB
5520 if (!slp_node)
5521 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
9dc3f7de
IR
5522 return true;
5523 }
5524
67b8dbac 5525 /* Transform. */
9dc3f7de 5526
73fbfcad 5527 if (dump_enabled_p ())
78c60e3d 5528 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5529 "transform binary/unary operation.\n");
9dc3f7de
IR
5530
5531 /* Handle def. */
5532 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5533
9dc3f7de
IR
5534 prev_stmt_info = NULL;
5535 for (j = 0; j < ncopies; j++)
5536 {
5537 /* Handle uses. */
5538 if (j == 0)
5539 {
5540 if (scalar_shift_arg)
5541 {
5542 /* Vector shl and shr insn patterns can be defined with scalar
5543 operand 2 (shift operand). In this case, use constant or loop
5544 invariant op1 directly, without extending it to vector mode
5545 first. */
5546 optab_op2_mode = insn_data[icode].operand[2].mode;
5547 if (!VECTOR_MODE_P (optab_op2_mode))
5548 {
73fbfcad 5549 if (dump_enabled_p ())
78c60e3d 5550 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5551 "operand 1 using scalar mode.\n");
9dc3f7de 5552 vec_oprnd1 = op1;
8930f723 5553 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5554 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5555 if (slp_node)
5556 {
5557 /* Store vec_oprnd1 for every vector stmt to be created
5558 for SLP_NODE. We check during the analysis that all
5559 the shift arguments are the same.
5560 TODO: Allow different constants for different vector
5561 stmts generated for an SLP instance. */
5562 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5563 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5564 }
5565 }
5566 }
5567
5568 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5569 (a special case for certain kind of vector shifts); otherwise,
5570 operand 1 should be of a vector type (the usual case). */
5571 if (vec_oprnd1)
5572 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5573 slp_node);
9dc3f7de
IR
5574 else
5575 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5576 slp_node);
9dc3f7de
IR
5577 }
5578 else
5579 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5580
5581 /* Arguments are ready. Create the new vector stmt. */
9771b263 5582 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5583 {
9771b263 5584 vop1 = vec_oprnds1[i];
0d0e4a03 5585 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5586 new_temp = make_ssa_name (vec_dest, new_stmt);
5587 gimple_assign_set_lhs (new_stmt, new_temp);
5588 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5589 if (slp_node)
9771b263 5590 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5591 }
5592
5593 if (slp_node)
5594 continue;
5595
5596 if (j == 0)
5597 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5598 else
5599 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5600 prev_stmt_info = vinfo_for_stmt (new_stmt);
5601 }
5602
9771b263
DN
5603 vec_oprnds0.release ();
5604 vec_oprnds1.release ();
9dc3f7de
IR
5605
5606 return true;
5607}
5608
5609
ebfd146a
IR
5610/* Function vectorizable_operation.
5611
16949072
RG
5612 Check if STMT performs a binary, unary or ternary operation that can
5613 be vectorized.
b8698a0f 5614 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5615 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5616 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5617
5618static bool
355fe088
TS
5619vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5620 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 5621{
00f07b86 5622 tree vec_dest;
ebfd146a 5623 tree scalar_dest;
16949072 5624 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5625 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5626 tree vectype;
ebfd146a 5627 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5628 enum tree_code code, orig_code;
ef4bddc2 5629 machine_mode vec_mode;
ebfd146a
IR
5630 tree new_temp;
5631 int op_type;
00f07b86 5632 optab optab;
523ba738 5633 bool target_support_p;
355fe088 5634 gimple *def_stmt;
16949072
RG
5635 enum vect_def_type dt[3]
5636 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5637 int ndts = 3;
355fe088 5638 gimple *new_stmt = NULL;
ebfd146a 5639 stmt_vec_info prev_stmt_info;
928686b1
RS
5640 poly_uint64 nunits_in;
5641 poly_uint64 nunits_out;
ebfd146a
IR
5642 tree vectype_out;
5643 int ncopies;
5644 int j, i;
6e1aa848
DN
5645 vec<tree> vec_oprnds0 = vNULL;
5646 vec<tree> vec_oprnds1 = vNULL;
5647 vec<tree> vec_oprnds2 = vNULL;
16949072 5648 tree vop0, vop1, vop2;
a70d6342 5649 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5650 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5651
a70d6342 5652 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5653 return false;
5654
66c16fd9
RB
5655 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5656 && ! vec_stmt)
ebfd146a
IR
5657 return false;
5658
5659 /* Is STMT a vectorizable binary/unary operation? */
5660 if (!is_gimple_assign (stmt))
5661 return false;
5662
5663 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5664 return false;
5665
0eb952ea 5666 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5667
1af4ebf5
MG
5668 /* For pointer addition and subtraction, we should use the normal
5669 plus and minus for the vector operation. */
ebfd146a
IR
5670 if (code == POINTER_PLUS_EXPR)
5671 code = PLUS_EXPR;
1af4ebf5
MG
5672 if (code == POINTER_DIFF_EXPR)
5673 code = MINUS_EXPR;
ebfd146a
IR
5674
5675 /* Support only unary or binary operations. */
5676 op_type = TREE_CODE_LENGTH (code);
16949072 5677 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5678 {
73fbfcad 5679 if (dump_enabled_p ())
78c60e3d 5680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5681 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5682 op_type);
ebfd146a
IR
5683 return false;
5684 }
5685
b690cc0f
RG
5686 scalar_dest = gimple_assign_lhs (stmt);
5687 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5688
7b7b1813
RG
5689 /* Most operations cannot handle bit-precision types without extra
5690 truncations. */
045c1278 5691 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5692 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5693 /* Exception are bitwise binary operations. */
5694 && code != BIT_IOR_EXPR
5695 && code != BIT_XOR_EXPR
5696 && code != BIT_AND_EXPR)
5697 {
73fbfcad 5698 if (dump_enabled_p ())
78c60e3d 5699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5700 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5701 return false;
5702 }
5703
ebfd146a 5704 op0 = gimple_assign_rhs1 (stmt);
81c40241 5705 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5706 {
73fbfcad 5707 if (dump_enabled_p ())
78c60e3d 5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5709 "use not simple.\n");
ebfd146a
IR
5710 return false;
5711 }
b690cc0f
RG
5712 /* If op0 is an external or constant def use a vector type with
5713 the same size as the output vector type. */
5714 if (!vectype)
b036c6c5
IE
5715 {
5716 /* For boolean type we cannot determine vectype by
5717 invariant value (don't know whether it is a vector
5718 of booleans or vector of integers). We use output
5719 vectype because operations on boolean don't change
5720 type. */
2568d8a1 5721 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5722 {
2568d8a1 5723 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5724 {
5725 if (dump_enabled_p ())
5726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5727 "not supported operation on bool value.\n");
5728 return false;
5729 }
5730 vectype = vectype_out;
5731 }
5732 else
5733 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5734 }
7d8930a0
IR
5735 if (vec_stmt)
5736 gcc_assert (vectype);
5737 if (!vectype)
5738 {
73fbfcad 5739 if (dump_enabled_p ())
7d8930a0 5740 {
78c60e3d
SS
5741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5742 "no vectype for scalar type ");
5743 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5744 TREE_TYPE (op0));
e645e942 5745 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5746 }
5747
5748 return false;
5749 }
b690cc0f
RG
5750
5751 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5752 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5753 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5754 return false;
ebfd146a 5755
16949072 5756 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5757 {
5758 op1 = gimple_assign_rhs2 (stmt);
81c40241 5759 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5760 {
73fbfcad 5761 if (dump_enabled_p ())
78c60e3d 5762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5763 "use not simple.\n");
ebfd146a
IR
5764 return false;
5765 }
5766 }
16949072
RG
5767 if (op_type == ternary_op)
5768 {
5769 op2 = gimple_assign_rhs3 (stmt);
81c40241 5770 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5771 {
73fbfcad 5772 if (dump_enabled_p ())
78c60e3d 5773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5774 "use not simple.\n");
16949072
RG
5775 return false;
5776 }
5777 }
ebfd146a 5778
b690cc0f 5779 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5780 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5781 case of SLP. */
fce57248 5782 if (slp_node)
b690cc0f
RG
5783 ncopies = 1;
5784 else
e8f142e2 5785 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5786
5787 gcc_assert (ncopies >= 1);
5788
9dc3f7de 5789 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5790 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5791 || code == RROTATE_EXPR)
9dc3f7de 5792 return false;
ebfd146a 5793
ebfd146a 5794 /* Supportable by target? */
00f07b86
RH
5795
5796 vec_mode = TYPE_MODE (vectype);
5797 if (code == MULT_HIGHPART_EXPR)
523ba738 5798 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5799 else
5800 {
5801 optab = optab_for_tree_code (code, vectype, optab_default);
5802 if (!optab)
5deb57cb 5803 {
73fbfcad 5804 if (dump_enabled_p ())
78c60e3d 5805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5806 "no optab.\n");
00f07b86 5807 return false;
5deb57cb 5808 }
523ba738
RS
5809 target_support_p = (optab_handler (optab, vec_mode)
5810 != CODE_FOR_nothing);
5deb57cb
JJ
5811 }
5812
523ba738 5813 if (!target_support_p)
ebfd146a 5814 {
73fbfcad 5815 if (dump_enabled_p ())
78c60e3d 5816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5817 "op not supported by target.\n");
ebfd146a 5818 /* Check only during analysis. */
cf098191 5819 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5820 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5821 return false;
73fbfcad 5822 if (dump_enabled_p ())
e645e942
TJ
5823 dump_printf_loc (MSG_NOTE, vect_location,
5824 "proceeding using word mode.\n");
383d9c83
IR
5825 }
5826
4a00c761 5827 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5828 if (!VECTOR_MODE_P (vec_mode)
5829 && !vec_stmt
ca09abcb 5830 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5831 {
73fbfcad 5832 if (dump_enabled_p ())
78c60e3d 5833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5834 "not worthwhile without SIMD support.\n");
e34842c6 5835 return false;
7d8930a0 5836 }
ebfd146a 5837
ebfd146a
IR
5838 if (!vec_stmt) /* transformation not required. */
5839 {
4a00c761 5840 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5841 if (dump_enabled_p ())
78c60e3d 5842 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5843 "=== vectorizable_operation ===\n");
78604de0
RB
5844 if (!slp_node)
5845 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5846 return true;
5847 }
5848
67b8dbac 5849 /* Transform. */
ebfd146a 5850
73fbfcad 5851 if (dump_enabled_p ())
78c60e3d 5852 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5853 "transform binary/unary operation.\n");
383d9c83 5854
ebfd146a 5855 /* Handle def. */
00f07b86 5856 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5857
0eb952ea
JJ
5858 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5859 vectors with unsigned elements, but the result is signed. So, we
5860 need to compute the MINUS_EXPR into vectype temporary and
5861 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5862 tree vec_cvt_dest = NULL_TREE;
5863 if (orig_code == POINTER_DIFF_EXPR)
5864 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5865
ebfd146a
IR
5866 /* In case the vectorization factor (VF) is bigger than the number
5867 of elements that we can fit in a vectype (nunits), we have to generate
5868 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5869 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5870 from one copy of the vector stmt to the next, in the field
5871 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5872 stages to find the correct vector defs to be used when vectorizing
5873 stmts that use the defs of the current stmt. The example below
5874 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5875 we need to create 4 vectorized stmts):
5876
5877 before vectorization:
5878 RELATED_STMT VEC_STMT
5879 S1: x = memref - -
5880 S2: z = x + 1 - -
5881
5882 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5883 there):
5884 RELATED_STMT VEC_STMT
5885 VS1_0: vx0 = memref0 VS1_1 -
5886 VS1_1: vx1 = memref1 VS1_2 -
5887 VS1_2: vx2 = memref2 VS1_3 -
5888 VS1_3: vx3 = memref3 - -
5889 S1: x = load - VS1_0
5890 S2: z = x + 1 - -
5891
5892 step2: vectorize stmt S2 (done here):
5893 To vectorize stmt S2 we first need to find the relevant vector
5894 def for the first operand 'x'. This is, as usual, obtained from
5895 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5896 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5897 relevant vector def 'vx0'. Having found 'vx0' we can generate
5898 the vector stmt VS2_0, and as usual, record it in the
5899 STMT_VINFO_VEC_STMT of stmt S2.
5900 When creating the second copy (VS2_1), we obtain the relevant vector
5901 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5902 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5903 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5904 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5905 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5906 chain of stmts and pointers:
5907 RELATED_STMT VEC_STMT
5908 VS1_0: vx0 = memref0 VS1_1 -
5909 VS1_1: vx1 = memref1 VS1_2 -
5910 VS1_2: vx2 = memref2 VS1_3 -
5911 VS1_3: vx3 = memref3 - -
5912 S1: x = load - VS1_0
5913 VS2_0: vz0 = vx0 + v1 VS2_1 -
5914 VS2_1: vz1 = vx1 + v1 VS2_2 -
5915 VS2_2: vz2 = vx2 + v1 VS2_3 -
5916 VS2_3: vz3 = vx3 + v1 - -
5917 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5918
5919 prev_stmt_info = NULL;
5920 for (j = 0; j < ncopies; j++)
5921 {
5922 /* Handle uses. */
5923 if (j == 0)
4a00c761 5924 {
d6476f90 5925 if (op_type == binary_op)
4a00c761 5926 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5927 slp_node);
d6476f90
RB
5928 else if (op_type == ternary_op)
5929 {
5930 if (slp_node)
5931 {
5932 auto_vec<tree> ops(3);
5933 ops.quick_push (op0);
5934 ops.quick_push (op1);
5935 ops.quick_push (op2);
5936 auto_vec<vec<tree> > vec_defs(3);
5937 vect_get_slp_defs (ops, slp_node, &vec_defs);
5938 vec_oprnds0 = vec_defs[0];
5939 vec_oprnds1 = vec_defs[1];
5940 vec_oprnds2 = vec_defs[2];
5941 }
5942 else
5943 {
5944 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5945 NULL);
5946 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5947 NULL);
5948 }
5949 }
4a00c761
JJ
5950 else
5951 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5952 slp_node);
4a00c761 5953 }
ebfd146a 5954 else
4a00c761
JJ
5955 {
5956 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5957 if (op_type == ternary_op)
5958 {
9771b263
DN
5959 tree vec_oprnd = vec_oprnds2.pop ();
5960 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5961 vec_oprnd));
4a00c761
JJ
5962 }
5963 }
5964
5965 /* Arguments are ready. Create the new vector stmt. */
9771b263 5966 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5967 {
4a00c761 5968 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5969 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5970 vop2 = ((op_type == ternary_op)
9771b263 5971 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5972 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5973 new_temp = make_ssa_name (vec_dest, new_stmt);
5974 gimple_assign_set_lhs (new_stmt, new_temp);
5975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
5976 if (vec_cvt_dest)
5977 {
5978 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5979 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5980 new_temp);
5981 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5982 gimple_assign_set_lhs (new_stmt, new_temp);
5983 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5984 }
4a00c761 5985 if (slp_node)
9771b263 5986 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5987 }
5988
4a00c761
JJ
5989 if (slp_node)
5990 continue;
5991
5992 if (j == 0)
5993 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5994 else
5995 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5996 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5997 }
5998
9771b263
DN
5999 vec_oprnds0.release ();
6000 vec_oprnds1.release ();
6001 vec_oprnds2.release ();
ebfd146a 6002
ebfd146a
IR
6003 return true;
6004}
6005
f702e7d4 6006/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6007
6008static void
f702e7d4 6009ensure_base_align (struct data_reference *dr)
c716e67f
XDL
6010{
6011 if (!dr->aux)
6012 return;
6013
52639a61 6014 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6015 {
52639a61 6016 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6017
f702e7d4
RS
6018 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6019
428f0c67 6020 if (decl_in_symtab_p (base_decl))
f702e7d4 6021 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6022 else
6023 {
f702e7d4 6024 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6025 DECL_USER_ALIGN (base_decl) = 1;
6026 }
52639a61 6027 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6028 }
6029}
6030
ebfd146a 6031
44fc7854
BE
6032/* Function get_group_alias_ptr_type.
6033
6034 Return the alias type for the group starting at FIRST_STMT. */
6035
6036static tree
6037get_group_alias_ptr_type (gimple *first_stmt)
6038{
6039 struct data_reference *first_dr, *next_dr;
6040 gimple *next_stmt;
6041
6042 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6043 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6044 while (next_stmt)
6045 {
6046 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6047 if (get_alias_set (DR_REF (first_dr))
6048 != get_alias_set (DR_REF (next_dr)))
6049 {
6050 if (dump_enabled_p ())
6051 dump_printf_loc (MSG_NOTE, vect_location,
6052 "conflicting alias set types.\n");
6053 return ptr_type_node;
6054 }
6055 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6056 }
6057 return reference_alias_ptr_type (DR_REF (first_dr));
6058}
6059
6060
ebfd146a
IR
6061/* Function vectorizable_store.
6062
b8698a0f
L
6063 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6064 can be vectorized.
6065 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6066 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6067 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6068
6069static bool
355fe088 6070vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6071 slp_tree slp_node)
ebfd146a 6072{
ebfd146a
IR
6073 tree data_ref;
6074 tree op;
6075 tree vec_oprnd = NULL_TREE;
6076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6077 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6078 tree elem_type;
ebfd146a 6079 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6080 struct loop *loop = NULL;
ef4bddc2 6081 machine_mode vec_mode;
ebfd146a
IR
6082 tree dummy;
6083 enum dr_alignment_support alignment_support_scheme;
355fe088 6084 gimple *def_stmt;
929b4411
RS
6085 enum vect_def_type rhs_dt = vect_unknown_def_type;
6086 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6087 stmt_vec_info prev_stmt_info = NULL;
6088 tree dataref_ptr = NULL_TREE;
74bf76ed 6089 tree dataref_offset = NULL_TREE;
355fe088 6090 gimple *ptr_incr = NULL;
ebfd146a
IR
6091 int ncopies;
6092 int j;
2de001ee
RS
6093 gimple *next_stmt, *first_stmt;
6094 bool grouped_store;
ebfd146a 6095 unsigned int group_size, i;
6e1aa848
DN
6096 vec<tree> oprnds = vNULL;
6097 vec<tree> result_chain = vNULL;
ebfd146a 6098 bool inv_p;
09dfa495 6099 tree offset = NULL_TREE;
6e1aa848 6100 vec<tree> vec_oprnds = vNULL;
ebfd146a 6101 bool slp = (slp_node != NULL);
ebfd146a 6102 unsigned int vec_num;
a70d6342 6103 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6104 vec_info *vinfo = stmt_info->vinfo;
272c6793 6105 tree aggr_type;
134c85ca 6106 gather_scatter_info gs_info;
355fe088 6107 gimple *new_stmt;
d9f21f6a 6108 poly_uint64 vf;
2de001ee 6109 vec_load_store_type vls_type;
44fc7854 6110 tree ref_type;
a70d6342 6111
a70d6342 6112 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6113 return false;
6114
66c16fd9
RB
6115 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6116 && ! vec_stmt)
ebfd146a
IR
6117 return false;
6118
6119 /* Is vectorizable store? */
6120
c3a8f964
RS
6121 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6122 if (is_gimple_assign (stmt))
6123 {
6124 tree scalar_dest = gimple_assign_lhs (stmt);
6125 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6126 && is_pattern_stmt_p (stmt_info))
6127 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6128 if (TREE_CODE (scalar_dest) != ARRAY_REF
6129 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6130 && TREE_CODE (scalar_dest) != INDIRECT_REF
6131 && TREE_CODE (scalar_dest) != COMPONENT_REF
6132 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6133 && TREE_CODE (scalar_dest) != REALPART_EXPR
6134 && TREE_CODE (scalar_dest) != MEM_REF)
6135 return false;
6136 }
6137 else
6138 {
6139 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6140 if (!call || !gimple_call_internal_p (call))
6141 return false;
6142
6143 internal_fn ifn = gimple_call_internal_fn (call);
6144 if (!internal_store_fn_p (ifn))
c3a8f964 6145 return false;
ebfd146a 6146
c3a8f964
RS
6147 if (slp_node != NULL)
6148 {
6149 if (dump_enabled_p ())
6150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6151 "SLP of masked stores not supported.\n");
6152 return false;
6153 }
6154
f307441a
RS
6155 int mask_index = internal_fn_mask_index (ifn);
6156 if (mask_index >= 0)
6157 {
6158 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6159 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6160 &mask_vectype))
f307441a
RS
6161 return false;
6162 }
c3a8f964
RS
6163 }
6164
6165 op = vect_get_store_rhs (stmt);
ebfd146a 6166
fce57248
RS
6167 /* Cannot have hybrid store SLP -- that would mean storing to the
6168 same location twice. */
6169 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6170
f4d09712 6171 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6172 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6173
6174 if (loop_vinfo)
b17dc4d4
RB
6175 {
6176 loop = LOOP_VINFO_LOOP (loop_vinfo);
6177 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6178 }
6179 else
6180 vf = 1;
465c8c19
JJ
6181
6182 /* Multiple types in SLP are handled by creating the appropriate number of
6183 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6184 case of SLP. */
fce57248 6185 if (slp)
465c8c19
JJ
6186 ncopies = 1;
6187 else
e8f142e2 6188 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6189
6190 gcc_assert (ncopies >= 1);
6191
6192 /* FORNOW. This restriction should be relaxed. */
6193 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6194 {
6195 if (dump_enabled_p ())
6196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6197 "multiple types in nested loop.\n");
6198 return false;
6199 }
6200
929b4411 6201 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6202 return false;
6203
272c6793 6204 elem_type = TREE_TYPE (vectype);
ebfd146a 6205 vec_mode = TYPE_MODE (vectype);
7b7b1813 6206
ebfd146a
IR
6207 if (!STMT_VINFO_DATA_REF (stmt_info))
6208 return false;
6209
2de001ee 6210 vect_memory_access_type memory_access_type;
7e11fc7f 6211 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6212 &memory_access_type, &gs_info))
6213 return false;
3bab6342 6214
c3a8f964
RS
6215 if (mask)
6216 {
7e11fc7f
RS
6217 if (memory_access_type == VMAT_CONTIGUOUS)
6218 {
6219 if (!VECTOR_MODE_P (vec_mode)
6220 || !can_vec_mask_load_store_p (vec_mode,
6221 TYPE_MODE (mask_vectype), false))
6222 return false;
6223 }
f307441a
RS
6224 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6225 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6226 {
6227 if (dump_enabled_p ())
6228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6229 "unsupported access type for masked store.\n");
6230 return false;
6231 }
c3a8f964
RS
6232 }
6233 else
6234 {
6235 /* FORNOW. In some cases can vectorize even if data-type not supported
6236 (e.g. - array initialization with 0). */
6237 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6238 return false;
6239 }
6240
f307441a 6241 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6242 && memory_access_type != VMAT_GATHER_SCATTER
6243 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6244 if (grouped_store)
6245 {
6246 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6247 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6248 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6249 }
6250 else
6251 {
6252 first_stmt = stmt;
6253 first_dr = dr;
6254 group_size = vec_num = 1;
6255 }
6256
ebfd146a
IR
6257 if (!vec_stmt) /* transformation not required. */
6258 {
2de001ee 6259 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6260
6261 if (loop_vinfo
6262 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6263 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6264 memory_access_type, &gs_info);
7cfb4d93 6265
ebfd146a 6266 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c 6267 /* The SLP costs are calculated during SLP analysis. */
78604de0 6268 if (!slp_node)
9ce4345a
RS
6269 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6270 vls_type, NULL, NULL, NULL);
ebfd146a
IR
6271 return true;
6272 }
2de001ee 6273 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6274
67b8dbac 6275 /* Transform. */
ebfd146a 6276
f702e7d4 6277 ensure_base_align (dr);
c716e67f 6278
f307441a 6279 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6280 {
c3a8f964 6281 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6282 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6283 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6284 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6285 edge pe = loop_preheader_edge (loop);
6286 gimple_seq seq;
6287 basic_block new_bb;
6288 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6289 poly_uint64 scatter_off_nunits
6290 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6291
4d694b27 6292 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6293 modifier = NONE;
4d694b27 6294 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6295 {
3bab6342
AT
6296 modifier = WIDEN;
6297
4d694b27
RS
6298 /* Currently gathers and scatters are only supported for
6299 fixed-length vectors. */
6300 unsigned int count = scatter_off_nunits.to_constant ();
6301 vec_perm_builder sel (count, count, 1);
6302 for (i = 0; i < (unsigned int) count; ++i)
6303 sel.quick_push (i | (count / 2));
3bab6342 6304
4d694b27 6305 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6306 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6307 indices);
3bab6342
AT
6308 gcc_assert (perm_mask != NULL_TREE);
6309 }
4d694b27 6310 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6311 {
3bab6342
AT
6312 modifier = NARROW;
6313
4d694b27
RS
6314 /* Currently gathers and scatters are only supported for
6315 fixed-length vectors. */
6316 unsigned int count = nunits.to_constant ();
6317 vec_perm_builder sel (count, count, 1);
6318 for (i = 0; i < (unsigned int) count; ++i)
6319 sel.quick_push (i | (count / 2));
3bab6342 6320
4d694b27 6321 vec_perm_indices indices (sel, 2, count);
e3342de4 6322 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6323 gcc_assert (perm_mask != NULL_TREE);
6324 ncopies *= 2;
6325 }
6326 else
6327 gcc_unreachable ();
6328
134c85ca 6329 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6330 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6331 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6332 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6333 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6334 scaletype = TREE_VALUE (arglist);
6335
6336 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6337 && TREE_CODE (rettype) == VOID_TYPE);
6338
134c85ca 6339 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6340 if (!is_gimple_min_invariant (ptr))
6341 {
6342 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6343 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6344 gcc_assert (!new_bb);
6345 }
6346
6347 /* Currently we support only unconditional scatter stores,
6348 so mask should be all ones. */
6349 mask = build_int_cst (masktype, -1);
6350 mask = vect_init_vector (stmt, mask, masktype, NULL);
6351
134c85ca 6352 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6353
6354 prev_stmt_info = NULL;
6355 for (j = 0; j < ncopies; ++j)
6356 {
6357 if (j == 0)
6358 {
6359 src = vec_oprnd1
c3a8f964 6360 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6361 op = vec_oprnd0
134c85ca 6362 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6363 }
6364 else if (modifier != NONE && (j & 1))
6365 {
6366 if (modifier == WIDEN)
6367 {
6368 src = vec_oprnd1
929b4411 6369 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6370 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6371 stmt, gsi);
6372 }
6373 else if (modifier == NARROW)
6374 {
6375 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6376 stmt, gsi);
6377 op = vec_oprnd0
134c85ca
RS
6378 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6379 vec_oprnd0);
3bab6342
AT
6380 }
6381 else
6382 gcc_unreachable ();
6383 }
6384 else
6385 {
6386 src = vec_oprnd1
929b4411 6387 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6388 op = vec_oprnd0
134c85ca
RS
6389 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6390 vec_oprnd0);
3bab6342
AT
6391 }
6392
6393 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6394 {
928686b1
RS
6395 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6396 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6397 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6398 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6399 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6401 src = var;
6402 }
6403
6404 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6405 {
928686b1
RS
6406 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6407 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6408 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6409 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6410 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6412 op = var;
6413 }
6414
6415 new_stmt
134c85ca 6416 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6417
6418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6419
6420 if (prev_stmt_info == NULL)
6421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6422 else
6423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6424 prev_stmt_info = vinfo_for_stmt (new_stmt);
6425 }
6426 return true;
6427 }
6428
f307441a 6429 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6430 {
f307441a
RS
6431 gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6432 GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6433 }
ebfd146a 6434
f307441a
RS
6435 if (grouped_store)
6436 {
ebfd146a 6437 /* FORNOW */
a70d6342 6438 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6439
6440 /* We vectorize all the stmts of the interleaving group when we
6441 reach the last stmt in the group. */
e14c1050
IR
6442 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6443 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6444 && !slp)
6445 {
6446 *vec_stmt = NULL;
6447 return true;
6448 }
6449
6450 if (slp)
4b5caab7 6451 {
0d0293ac 6452 grouped_store = false;
4b5caab7
IR
6453 /* VEC_NUM is the number of vect stmts to be created for this
6454 group. */
6455 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6456 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 6457 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6458 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6459 op = vect_get_store_rhs (first_stmt);
4b5caab7 6460 }
ebfd146a 6461 else
4b5caab7
IR
6462 /* VEC_NUM is the number of vect stmts to be created for this
6463 group. */
ebfd146a 6464 vec_num = group_size;
44fc7854
BE
6465
6466 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6467 }
b8698a0f 6468 else
7cfb4d93 6469 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6470
73fbfcad 6471 if (dump_enabled_p ())
78c60e3d 6472 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6473 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6474
2de001ee
RS
6475 if (memory_access_type == VMAT_ELEMENTWISE
6476 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6477 {
6478 gimple_stmt_iterator incr_gsi;
6479 bool insert_after;
355fe088 6480 gimple *incr;
f2e2a985
MM
6481 tree offvar;
6482 tree ivstep;
6483 tree running_off;
f2e2a985
MM
6484 tree stride_base, stride_step, alias_off;
6485 tree vec_oprnd;
f502d50e 6486 unsigned int g;
4d694b27
RS
6487 /* Checked by get_load_store_type. */
6488 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6489
7cfb4d93 6490 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6491 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6492
6493 stride_base
6494 = fold_build_pointer_plus
b210f45f 6495 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6496 size_binop (PLUS_EXPR,
b210f45f 6497 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6498 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6499 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6500
6501 /* For a store with loop-invariant (but other than power-of-2)
6502 stride (i.e. not a grouped access) like so:
6503
6504 for (i = 0; i < n; i += stride)
6505 array[i] = ...;
6506
6507 we generate a new induction variable and new stores from
6508 the components of the (vectorized) rhs:
6509
6510 for (j = 0; ; j += VF*stride)
6511 vectemp = ...;
6512 tmp1 = vectemp[0];
6513 array[j] = tmp1;
6514 tmp2 = vectemp[1];
6515 array[j + stride] = tmp2;
6516 ...
6517 */
6518
4d694b27 6519 unsigned nstores = const_nunits;
b17dc4d4 6520 unsigned lnel = 1;
cee62fee 6521 tree ltype = elem_type;
04199738 6522 tree lvectype = vectype;
cee62fee
MM
6523 if (slp)
6524 {
4d694b27
RS
6525 if (group_size < const_nunits
6526 && const_nunits % group_size == 0)
b17dc4d4 6527 {
4d694b27 6528 nstores = const_nunits / group_size;
b17dc4d4
RB
6529 lnel = group_size;
6530 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6531 lvectype = vectype;
6532
6533 /* First check if vec_extract optab doesn't support extraction
6534 of vector elts directly. */
b397965c 6535 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6536 machine_mode vmode;
6537 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6538 || !VECTOR_MODE_P (vmode)
414fef4e 6539 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6540 || (convert_optab_handler (vec_extract_optab,
6541 TYPE_MODE (vectype), vmode)
6542 == CODE_FOR_nothing))
6543 {
6544 /* Try to avoid emitting an extract of vector elements
6545 by performing the extracts using an integer type of the
6546 same size, extracting from a vector of those and then
6547 re-interpreting it as the original vector type if
6548 supported. */
6549 unsigned lsize
6550 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6551 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6552 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6553 /* If we can't construct such a vector fall back to
6554 element extracts from the original vector type and
6555 element size stores. */
4d694b27 6556 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6557 && VECTOR_MODE_P (vmode)
414fef4e 6558 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6559 && (convert_optab_handler (vec_extract_optab,
6560 vmode, elmode)
6561 != CODE_FOR_nothing))
6562 {
4d694b27 6563 nstores = lnunits;
04199738
RB
6564 lnel = group_size;
6565 ltype = build_nonstandard_integer_type (lsize, 1);
6566 lvectype = build_vector_type (ltype, nstores);
6567 }
6568 /* Else fall back to vector extraction anyway.
6569 Fewer stores are more important than avoiding spilling
6570 of the vector we extract from. Compared to the
6571 construction case in vectorizable_load no store-forwarding
6572 issue exists here for reasonable archs. */
6573 }
b17dc4d4 6574 }
4d694b27
RS
6575 else if (group_size >= const_nunits
6576 && group_size % const_nunits == 0)
b17dc4d4
RB
6577 {
6578 nstores = 1;
4d694b27 6579 lnel = const_nunits;
b17dc4d4 6580 ltype = vectype;
04199738 6581 lvectype = vectype;
b17dc4d4 6582 }
cee62fee
MM
6583 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6584 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6585 }
6586
f2e2a985
MM
6587 ivstep = stride_step;
6588 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6589 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6590
6591 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6592
b210f45f
RB
6593 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6594 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6595 create_iv (stride_base, ivstep, NULL,
6596 loop, &incr_gsi, insert_after,
6597 &offvar, NULL);
6598 incr = gsi_stmt (incr_gsi);
310213d4 6599 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985 6600
b210f45f 6601 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6602
6603 prev_stmt_info = NULL;
44fc7854 6604 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6605 next_stmt = first_stmt;
6606 for (g = 0; g < group_size; g++)
f2e2a985 6607 {
f502d50e
MM
6608 running_off = offvar;
6609 if (g)
f2e2a985 6610 {
f502d50e
MM
6611 tree size = TYPE_SIZE_UNIT (ltype);
6612 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6613 size);
f502d50e 6614 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6615 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6616 running_off, pos);
f2e2a985 6617 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6618 running_off = newoff;
f502d50e 6619 }
b17dc4d4
RB
6620 unsigned int group_el = 0;
6621 unsigned HOST_WIDE_INT
6622 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6623 for (j = 0; j < ncopies; j++)
6624 {
c3a8f964 6625 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6626 and first_stmt == stmt. */
6627 if (j == 0)
6628 {
6629 if (slp)
6630 {
6631 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6632 slp_node);
f502d50e
MM
6633 vec_oprnd = vec_oprnds[0];
6634 }
6635 else
6636 {
c3a8f964 6637 op = vect_get_store_rhs (next_stmt);
81c40241 6638 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6639 }
6640 }
f2e2a985 6641 else
f502d50e
MM
6642 {
6643 if (slp)
6644 vec_oprnd = vec_oprnds[j];
6645 else
c079cbac 6646 {
929b4411
RS
6647 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6648 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6649 vec_oprnd);
c079cbac 6650 }
f502d50e 6651 }
04199738
RB
6652 /* Pun the vector to extract from if necessary. */
6653 if (lvectype != vectype)
6654 {
6655 tree tem = make_ssa_name (lvectype);
6656 gimple *pun
6657 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6658 lvectype, vec_oprnd));
6659 vect_finish_stmt_generation (stmt, pun, gsi);
6660 vec_oprnd = tem;
6661 }
f502d50e
MM
6662 for (i = 0; i < nstores; i++)
6663 {
6664 tree newref, newoff;
355fe088 6665 gimple *incr, *assign;
f502d50e
MM
6666 tree size = TYPE_SIZE (ltype);
6667 /* Extract the i'th component. */
6668 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6669 bitsize_int (i), size);
6670 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6671 size, pos);
6672
6673 elem = force_gimple_operand_gsi (gsi, elem, true,
6674 NULL_TREE, true,
6675 GSI_SAME_STMT);
6676
b17dc4d4
RB
6677 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6678 group_el * elsz);
f502d50e 6679 newref = build2 (MEM_REF, ltype,
b17dc4d4 6680 running_off, this_off);
19986382 6681 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6682
6683 /* And store it to *running_off. */
6684 assign = gimple_build_assign (newref, elem);
6685 vect_finish_stmt_generation (stmt, assign, gsi);
6686
b17dc4d4
RB
6687 group_el += lnel;
6688 if (! slp
6689 || group_el == group_size)
6690 {
6691 newoff = copy_ssa_name (running_off, NULL);
6692 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6693 running_off, stride_step);
6694 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6695
b17dc4d4
RB
6696 running_off = newoff;
6697 group_el = 0;
6698 }
225ce44b
RB
6699 if (g == group_size - 1
6700 && !slp)
f502d50e
MM
6701 {
6702 if (j == 0 && i == 0)
225ce44b
RB
6703 STMT_VINFO_VEC_STMT (stmt_info)
6704 = *vec_stmt = assign;
f502d50e
MM
6705 else
6706 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6707 prev_stmt_info = vinfo_for_stmt (assign);
6708 }
6709 }
f2e2a985 6710 }
f502d50e 6711 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6712 if (slp)
6713 break;
f2e2a985 6714 }
778dd3b6
RB
6715
6716 vec_oprnds.release ();
f2e2a985
MM
6717 return true;
6718 }
6719
8c681247 6720 auto_vec<tree> dr_chain (group_size);
9771b263 6721 oprnds.create (group_size);
ebfd146a 6722
720f5239 6723 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6724 gcc_assert (alignment_support_scheme);
70088b95
RS
6725 vec_loop_masks *loop_masks
6726 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6727 ? &LOOP_VINFO_MASKS (loop_vinfo)
6728 : NULL);
272c6793 6729 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6730 realignment. vect_supportable_dr_alignment always returns either
6731 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6732 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6733 && !mask
70088b95 6734 && !loop_masks)
272c6793
RS
6735 || alignment_support_scheme == dr_aligned
6736 || alignment_support_scheme == dr_unaligned_supported);
6737
62da9e14
RS
6738 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6739 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6740 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6741
f307441a
RS
6742 tree bump;
6743 tree vec_offset = NULL_TREE;
6744 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6745 {
6746 aggr_type = NULL_TREE;
6747 bump = NULL_TREE;
6748 }
6749 else if (memory_access_type == VMAT_GATHER_SCATTER)
6750 {
6751 aggr_type = elem_type;
6752 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6753 &bump, &vec_offset);
6754 }
272c6793 6755 else
f307441a
RS
6756 {
6757 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6758 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6759 else
6760 aggr_type = vectype;
6761 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6762 }
ebfd146a 6763
c3a8f964
RS
6764 if (mask)
6765 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6766
ebfd146a
IR
6767 /* In case the vectorization factor (VF) is bigger than the number
6768 of elements that we can fit in a vectype (nunits), we have to generate
6769 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6770 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6771 vect_get_vec_def_for_copy_stmt. */
6772
0d0293ac 6773 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6774
6775 S1: &base + 2 = x2
6776 S2: &base = x0
6777 S3: &base + 1 = x1
6778 S4: &base + 3 = x3
6779
6780 We create vectorized stores starting from base address (the access of the
6781 first stmt in the chain (S2 in the above example), when the last store stmt
6782 of the chain (S4) is reached:
6783
6784 VS1: &base = vx2
6785 VS2: &base + vec_size*1 = vx0
6786 VS3: &base + vec_size*2 = vx1
6787 VS4: &base + vec_size*3 = vx3
6788
6789 Then permutation statements are generated:
6790
3fcc1b55
JJ
6791 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6792 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6793 ...
b8698a0f 6794
ebfd146a
IR
6795 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6796 (the order of the data-refs in the output of vect_permute_store_chain
6797 corresponds to the order of scalar stmts in the interleaving chain - see
6798 the documentation of vect_permute_store_chain()).
6799
6800 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6801 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6802 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6803 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6804 */
6805
6806 prev_stmt_info = NULL;
c3a8f964 6807 tree vec_mask = NULL_TREE;
ebfd146a
IR
6808 for (j = 0; j < ncopies; j++)
6809 {
ebfd146a
IR
6810
6811 if (j == 0)
6812 {
6813 if (slp)
6814 {
6815 /* Get vectorized arguments for SLP_NODE. */
d092494c 6816 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6817 NULL, slp_node);
ebfd146a 6818
9771b263 6819 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6820 }
6821 else
6822 {
b8698a0f
L
6823 /* For interleaved stores we collect vectorized defs for all the
6824 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6825 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6826 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6827
0d0293ac 6828 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6829 OPRNDS are of size 1. */
b8698a0f 6830 next_stmt = first_stmt;
ebfd146a
IR
6831 for (i = 0; i < group_size; i++)
6832 {
b8698a0f
L
6833 /* Since gaps are not supported for interleaved stores,
6834 GROUP_SIZE is the exact number of stmts in the chain.
6835 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6836 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a 6837 iteration of the loop will be executed. */
c3a8f964 6838 op = vect_get_store_rhs (next_stmt);
81c40241 6839 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6840 dr_chain.quick_push (vec_oprnd);
6841 oprnds.quick_push (vec_oprnd);
e14c1050 6842 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6843 }
c3a8f964
RS
6844 if (mask)
6845 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6846 mask_vectype);
ebfd146a
IR
6847 }
6848
6849 /* We should have catched mismatched types earlier. */
6850 gcc_assert (useless_type_conversion_p (vectype,
6851 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6852 bool simd_lane_access_p
6853 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6854 if (simd_lane_access_p
6855 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6856 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6857 && integer_zerop (DR_OFFSET (first_dr))
6858 && integer_zerop (DR_INIT (first_dr))
6859 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6860 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6861 {
6862 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6863 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6864 inv_p = false;
74bf76ed 6865 }
f307441a
RS
6866 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6867 {
6868 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6869 &dataref_ptr, &vec_offset);
6870 inv_p = false;
6871 }
74bf76ed
JJ
6872 else
6873 dataref_ptr
6874 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6875 simd_lane_access_p ? loop : NULL,
09dfa495 6876 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
6877 simd_lane_access_p, &inv_p,
6878 NULL_TREE, bump);
a70d6342 6879 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6880 }
b8698a0f 6881 else
ebfd146a 6882 {
b8698a0f
L
6883 /* For interleaved stores we created vectorized defs for all the
6884 defs stored in OPRNDS in the previous iteration (previous copy).
6885 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6886 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6887 next copy.
0d0293ac 6888 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6889 OPRNDS are of size 1. */
6890 for (i = 0; i < group_size; i++)
6891 {
9771b263 6892 op = oprnds[i];
929b4411
RS
6893 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6894 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
6895 dr_chain[i] = vec_oprnd;
6896 oprnds[i] = vec_oprnd;
ebfd146a 6897 }
c3a8f964 6898 if (mask)
929b4411 6899 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
6900 if (dataref_offset)
6901 dataref_offset
f307441a
RS
6902 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6903 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
6904 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6905 vec_offset);
74bf76ed
JJ
6906 else
6907 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 6908 bump);
ebfd146a
IR
6909 }
6910
2de001ee 6911 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 6912 {
272c6793 6913 tree vec_array;
267d3070 6914
3ba4ff41 6915 /* Get an array into which we can store the individual vectors. */
272c6793 6916 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
6917
6918 /* Invalidate the current contents of VEC_ARRAY. This should
6919 become an RTL clobber too, which prevents the vector registers
6920 from being upward-exposed. */
6921 vect_clobber_variable (stmt, gsi, vec_array);
6922
6923 /* Store the individual vectors into the array. */
272c6793 6924 for (i = 0; i < vec_num; i++)
c2d7ab2a 6925 {
9771b263 6926 vec_oprnd = dr_chain[i];
272c6793 6927 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 6928 }
b8698a0f 6929
7cfb4d93 6930 tree final_mask = NULL;
70088b95
RS
6931 if (loop_masks)
6932 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
6933 vectype, j);
7cfb4d93
RS
6934 if (vec_mask)
6935 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6936 vec_mask, gsi);
6937
7e11fc7f 6938 gcall *call;
7cfb4d93 6939 if (final_mask)
7e11fc7f
RS
6940 {
6941 /* Emit:
6942 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6943 VEC_ARRAY). */
6944 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6945 tree alias_ptr = build_int_cst (ref_type, align);
6946 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6947 dataref_ptr, alias_ptr,
7cfb4d93 6948 final_mask, vec_array);
7e11fc7f
RS
6949 }
6950 else
6951 {
6952 /* Emit:
6953 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6954 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6955 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6956 vec_array);
6957 gimple_call_set_lhs (call, data_ref);
6958 }
a844293d
RS
6959 gimple_call_set_nothrow (call, true);
6960 new_stmt = call;
267d3070 6961 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41
RS
6962
6963 /* Record that VEC_ARRAY is now dead. */
6964 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
6965 }
6966 else
6967 {
6968 new_stmt = NULL;
0d0293ac 6969 if (grouped_store)
272c6793 6970 {
b6b9227d
JJ
6971 if (j == 0)
6972 result_chain.create (group_size);
272c6793
RS
6973 /* Permute. */
6974 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6975 &result_chain);
6976 }
c2d7ab2a 6977
272c6793
RS
6978 next_stmt = first_stmt;
6979 for (i = 0; i < vec_num; i++)
6980 {
644ffefd 6981 unsigned align, misalign;
272c6793 6982
7cfb4d93 6983 tree final_mask = NULL_TREE;
70088b95
RS
6984 if (loop_masks)
6985 final_mask = vect_get_loop_mask (gsi, loop_masks,
6986 vec_num * ncopies,
7cfb4d93
RS
6987 vectype, vec_num * j + i);
6988 if (vec_mask)
6989 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6990 vec_mask, gsi);
6991
f307441a
RS
6992 if (memory_access_type == VMAT_GATHER_SCATTER)
6993 {
6994 tree scale = size_int (gs_info.scale);
6995 gcall *call;
70088b95 6996 if (loop_masks)
f307441a
RS
6997 call = gimple_build_call_internal
6998 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
6999 scale, vec_oprnd, final_mask);
7000 else
7001 call = gimple_build_call_internal
7002 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7003 scale, vec_oprnd);
7004 gimple_call_set_nothrow (call, true);
7005 new_stmt = call;
7006 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7007 break;
7008 }
7009
272c6793
RS
7010 if (i > 0)
7011 /* Bump the vector pointer. */
7012 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7013 stmt, bump);
272c6793
RS
7014
7015 if (slp)
9771b263 7016 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7017 else if (grouped_store)
7018 /* For grouped stores vectorized defs are interleaved in
272c6793 7019 vect_permute_store_chain(). */
9771b263 7020 vec_oprnd = result_chain[i];
272c6793 7021
f702e7d4 7022 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7023 if (aligned_access_p (first_dr))
644ffefd 7024 misalign = 0;
272c6793
RS
7025 else if (DR_MISALIGNMENT (first_dr) == -1)
7026 {
25f68d90 7027 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7028 misalign = 0;
272c6793
RS
7029 }
7030 else
c3a8f964 7031 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7032 if (dataref_offset == NULL_TREE
7033 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7034 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7035 misalign);
c2d7ab2a 7036
62da9e14 7037 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7038 {
7039 tree perm_mask = perm_mask_for_reverse (vectype);
7040 tree perm_dest
c3a8f964 7041 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7042 vectype);
b731b390 7043 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7044
7045 /* Generate the permute statement. */
355fe088 7046 gimple *perm_stmt
0d0e4a03
JJ
7047 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7048 vec_oprnd, perm_mask);
09dfa495
BM
7049 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7050
7051 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7052 vec_oprnd = new_temp;
7053 }
7054
272c6793 7055 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7056 if (final_mask)
c3a8f964
RS
7057 {
7058 align = least_bit_hwi (misalign | align);
7059 tree ptr = build_int_cst (ref_type, align);
7060 gcall *call
7061 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7062 dataref_ptr, ptr,
7cfb4d93 7063 final_mask, vec_oprnd);
c3a8f964
RS
7064 gimple_call_set_nothrow (call, true);
7065 new_stmt = call;
7066 }
7067 else
7068 {
7069 data_ref = fold_build2 (MEM_REF, vectype,
7070 dataref_ptr,
7071 dataref_offset
7072 ? dataref_offset
7073 : build_int_cst (ref_type, 0));
7074 if (aligned_access_p (first_dr))
7075 ;
7076 else if (DR_MISALIGNMENT (first_dr) == -1)
7077 TREE_TYPE (data_ref)
7078 = build_aligned_type (TREE_TYPE (data_ref),
7079 align * BITS_PER_UNIT);
7080 else
7081 TREE_TYPE (data_ref)
7082 = build_aligned_type (TREE_TYPE (data_ref),
7083 TYPE_ALIGN (elem_type));
19986382 7084 vect_copy_ref_info (data_ref, DR_REF (first_dr));
c3a8f964
RS
7085 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7086 }
272c6793 7087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
7088
7089 if (slp)
7090 continue;
7091
e14c1050 7092 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
7093 if (!next_stmt)
7094 break;
7095 }
ebfd146a 7096 }
1da0876c
RS
7097 if (!slp)
7098 {
7099 if (j == 0)
7100 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7101 else
7102 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7103 prev_stmt_info = vinfo_for_stmt (new_stmt);
7104 }
ebfd146a
IR
7105 }
7106
9771b263
DN
7107 oprnds.release ();
7108 result_chain.release ();
7109 vec_oprnds.release ();
ebfd146a
IR
7110
7111 return true;
7112}
7113
557be5a8
AL
7114/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7115 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7116 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7117 vect_gen_perm_mask_checked. */
a1e53f3f 7118
3fcc1b55 7119tree
4aae3cb3 7120vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7121{
b00cb3bf 7122 tree mask_type;
a1e53f3f 7123
0ecc2b7d
RS
7124 poly_uint64 nunits = sel.length ();
7125 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7126
7127 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7128 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7129}
7130
7ac7e286 7131/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7132 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7133
7134tree
4aae3cb3 7135vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7136{
7ac7e286 7137 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7138 return vect_gen_perm_mask_any (vectype, sel);
7139}
7140
aec7ae7d
JJ
7141/* Given a vector variable X and Y, that was generated for the scalar
7142 STMT, generate instructions to permute the vector elements of X and Y
7143 using permutation mask MASK_VEC, insert them at *GSI and return the
7144 permuted vector variable. */
a1e53f3f
L
7145
7146static tree
355fe088 7147permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7148 gimple_stmt_iterator *gsi)
a1e53f3f
L
7149{
7150 tree vectype = TREE_TYPE (x);
aec7ae7d 7151 tree perm_dest, data_ref;
355fe088 7152 gimple *perm_stmt;
a1e53f3f 7153
7ad429a4
RS
7154 tree scalar_dest = gimple_get_lhs (stmt);
7155 if (TREE_CODE (scalar_dest) == SSA_NAME)
7156 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7157 else
7158 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7159 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7160
7161 /* Generate the permute statement. */
0d0e4a03 7162 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7163 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7164
7165 return data_ref;
7166}
7167
6b916b36
RB
7168/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7169 inserting them on the loops preheader edge. Returns true if we
7170 were successful in doing so (and thus STMT can be moved then),
7171 otherwise returns false. */
7172
7173static bool
355fe088 7174hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7175{
7176 ssa_op_iter i;
7177 tree op;
7178 bool any = false;
7179
7180 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7181 {
355fe088 7182 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7183 if (!gimple_nop_p (def_stmt)
7184 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7185 {
7186 /* Make sure we don't need to recurse. While we could do
7187 so in simple cases when there are more complex use webs
7188 we don't have an easy way to preserve stmt order to fulfil
7189 dependencies within them. */
7190 tree op2;
7191 ssa_op_iter i2;
d1417442
JJ
7192 if (gimple_code (def_stmt) == GIMPLE_PHI)
7193 return false;
6b916b36
RB
7194 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7195 {
355fe088 7196 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7197 if (!gimple_nop_p (def_stmt2)
7198 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7199 return false;
7200 }
7201 any = true;
7202 }
7203 }
7204
7205 if (!any)
7206 return true;
7207
7208 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7209 {
355fe088 7210 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7211 if (!gimple_nop_p (def_stmt)
7212 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7213 {
7214 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7215 gsi_remove (&gsi, false);
7216 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7217 }
7218 }
7219
7220 return true;
7221}
7222
ebfd146a
IR
7223/* vectorizable_load.
7224
b8698a0f
L
7225 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7226 can be vectorized.
7227 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7228 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7230
7231static bool
355fe088 7232vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 7233 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
7234{
7235 tree scalar_dest;
7236 tree vec_dest = NULL;
7237 tree data_ref = NULL;
7238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7239 stmt_vec_info prev_stmt_info;
ebfd146a 7240 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7241 struct loop *loop = NULL;
ebfd146a 7242 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7243 bool nested_in_vect_loop = false;
c716e67f 7244 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7245 tree elem_type;
ebfd146a 7246 tree new_temp;
ef4bddc2 7247 machine_mode mode;
355fe088 7248 gimple *new_stmt = NULL;
ebfd146a
IR
7249 tree dummy;
7250 enum dr_alignment_support alignment_support_scheme;
7251 tree dataref_ptr = NULL_TREE;
74bf76ed 7252 tree dataref_offset = NULL_TREE;
355fe088 7253 gimple *ptr_incr = NULL;
ebfd146a 7254 int ncopies;
4d694b27
RS
7255 int i, j;
7256 unsigned int group_size;
7257 poly_uint64 group_gap_adj;
ebfd146a
IR
7258 tree msq = NULL_TREE, lsq;
7259 tree offset = NULL_TREE;
356bbc4c 7260 tree byte_offset = NULL_TREE;
ebfd146a 7261 tree realignment_token = NULL_TREE;
538dd0b7 7262 gphi *phi = NULL;
6e1aa848 7263 vec<tree> dr_chain = vNULL;
0d0293ac 7264 bool grouped_load = false;
355fe088 7265 gimple *first_stmt;
4f0a0218 7266 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
7267 bool inv_p;
7268 bool compute_in_loop = false;
7269 struct loop *at_loop;
7270 int vec_num;
7271 bool slp = (slp_node != NULL);
7272 bool slp_perm = false;
a70d6342 7273 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7274 poly_uint64 vf;
272c6793 7275 tree aggr_type;
134c85ca 7276 gather_scatter_info gs_info;
310213d4 7277 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7278 tree ref_type;
929b4411 7279 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7280
465c8c19
JJ
7281 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7282 return false;
7283
66c16fd9
RB
7284 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7285 && ! vec_stmt)
465c8c19
JJ
7286 return false;
7287
c3a8f964
RS
7288 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7289 if (is_gimple_assign (stmt))
7290 {
7291 scalar_dest = gimple_assign_lhs (stmt);
7292 if (TREE_CODE (scalar_dest) != SSA_NAME)
7293 return false;
465c8c19 7294
c3a8f964
RS
7295 tree_code code = gimple_assign_rhs_code (stmt);
7296 if (code != ARRAY_REF
7297 && code != BIT_FIELD_REF
7298 && code != INDIRECT_REF
7299 && code != COMPONENT_REF
7300 && code != IMAGPART_EXPR
7301 && code != REALPART_EXPR
7302 && code != MEM_REF
7303 && TREE_CODE_CLASS (code) != tcc_declaration)
7304 return false;
7305 }
7306 else
7307 {
7308 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7309 if (!call || !gimple_call_internal_p (call))
7310 return false;
7311
7312 internal_fn ifn = gimple_call_internal_fn (call);
7313 if (!internal_load_fn_p (ifn))
c3a8f964 7314 return false;
465c8c19 7315
c3a8f964
RS
7316 scalar_dest = gimple_call_lhs (call);
7317 if (!scalar_dest)
7318 return false;
7319
7320 if (slp_node != NULL)
7321 {
7322 if (dump_enabled_p ())
7323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7324 "SLP of masked loads not supported.\n");
7325 return false;
7326 }
7327
bfaa08b7
RS
7328 int mask_index = internal_fn_mask_index (ifn);
7329 if (mask_index >= 0)
7330 {
7331 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7332 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7333 &mask_vectype))
bfaa08b7
RS
7334 return false;
7335 }
c3a8f964 7336 }
465c8c19
JJ
7337
7338 if (!STMT_VINFO_DATA_REF (stmt_info))
7339 return false;
7340
7341 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7342 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7343
a70d6342
IR
7344 if (loop_vinfo)
7345 {
7346 loop = LOOP_VINFO_LOOP (loop_vinfo);
7347 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7348 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7349 }
7350 else
3533e503 7351 vf = 1;
ebfd146a
IR
7352
7353 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7354 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7355 case of SLP. */
fce57248 7356 if (slp)
ebfd146a
IR
7357 ncopies = 1;
7358 else
e8f142e2 7359 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7360
7361 gcc_assert (ncopies >= 1);
7362
7363 /* FORNOW. This restriction should be relaxed. */
7364 if (nested_in_vect_loop && ncopies > 1)
7365 {
73fbfcad 7366 if (dump_enabled_p ())
78c60e3d 7367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7368 "multiple types in nested loop.\n");
ebfd146a
IR
7369 return false;
7370 }
7371
f2556b68
RB
7372 /* Invalidate assumptions made by dependence analysis when vectorization
7373 on the unrolled body effectively re-orders stmts. */
7374 if (ncopies > 1
7375 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7376 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7377 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7378 {
7379 if (dump_enabled_p ())
7380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7381 "cannot perform implicit CSE when unrolling "
7382 "with negative dependence distance\n");
7383 return false;
7384 }
7385
7b7b1813 7386 elem_type = TREE_TYPE (vectype);
947131ba 7387 mode = TYPE_MODE (vectype);
ebfd146a
IR
7388
7389 /* FORNOW. In some cases can vectorize even if data-type not supported
7390 (e.g. - data copies). */
947131ba 7391 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7392 {
73fbfcad 7393 if (dump_enabled_p ())
78c60e3d 7394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7395 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7396 return false;
7397 }
7398
ebfd146a 7399 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7400 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7401 {
0d0293ac 7402 grouped_load = true;
ebfd146a 7403 /* FORNOW */
2de001ee
RS
7404 gcc_assert (!nested_in_vect_loop);
7405 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7406
e14c1050 7407 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 7408 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7409
b1af7da6
RB
7410 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7411 slp_perm = true;
7412
f2556b68
RB
7413 /* Invalidate assumptions made by dependence analysis when vectorization
7414 on the unrolled body effectively re-orders stmts. */
7415 if (!PURE_SLP_STMT (stmt_info)
7416 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7417 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7418 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7419 {
7420 if (dump_enabled_p ())
7421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7422 "cannot perform implicit CSE when performing "
7423 "group loads with negative dependence distance\n");
7424 return false;
7425 }
96bb56b2
RB
7426
7427 /* Similarly when the stmt is a load that is both part of a SLP
7428 instance and a loop vectorized stmt via the same-dr mechanism
7429 we have to give up. */
7430 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7431 && (STMT_SLP_TYPE (stmt_info)
7432 != STMT_SLP_TYPE (vinfo_for_stmt
7433 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7434 {
7435 if (dump_enabled_p ())
7436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7437 "conflicting SLP types for CSEd load\n");
7438 return false;
7439 }
ebfd146a 7440 }
7cfb4d93
RS
7441 else
7442 group_size = 1;
ebfd146a 7443
2de001ee 7444 vect_memory_access_type memory_access_type;
7e11fc7f 7445 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7446 &memory_access_type, &gs_info))
7447 return false;
a1e53f3f 7448
c3a8f964
RS
7449 if (mask)
7450 {
7451 if (memory_access_type == VMAT_CONTIGUOUS)
7452 {
7e11fc7f
RS
7453 machine_mode vec_mode = TYPE_MODE (vectype);
7454 if (!VECTOR_MODE_P (vec_mode)
7455 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7456 TYPE_MODE (mask_vectype), true))
7457 return false;
7458 }
bfaa08b7 7459 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7460 {
7461 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7462 tree masktype
7463 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7464 if (TREE_CODE (masktype) == INTEGER_TYPE)
7465 {
7466 if (dump_enabled_p ())
7467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7468 "masked gather with integer mask not"
7469 " supported.");
7470 return false;
7471 }
7472 }
bfaa08b7
RS
7473 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7474 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7475 {
7476 if (dump_enabled_p ())
7477 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7478 "unsupported access type for masked load.\n");
7479 return false;
7480 }
7481 }
7482
ebfd146a
IR
7483 if (!vec_stmt) /* transformation not required. */
7484 {
2de001ee
RS
7485 if (!slp)
7486 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7487
7488 if (loop_vinfo
7489 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7490 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7491 memory_access_type, &gs_info);
7cfb4d93 7492
ebfd146a 7493 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c 7494 /* The SLP costs are calculated during SLP analysis. */
78604de0 7495 if (! slp_node)
2de001ee 7496 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2e8ab70c 7497 NULL, NULL, NULL);
ebfd146a
IR
7498 return true;
7499 }
7500
2de001ee
RS
7501 if (!slp)
7502 gcc_assert (memory_access_type
7503 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7504
73fbfcad 7505 if (dump_enabled_p ())
78c60e3d 7506 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7507 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7508
67b8dbac 7509 /* Transform. */
ebfd146a 7510
f702e7d4 7511 ensure_base_align (dr);
c716e67f 7512
bfaa08b7 7513 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7514 {
929b4411
RS
7515 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7516 mask_dt);
aec7ae7d
JJ
7517 return true;
7518 }
2de001ee
RS
7519
7520 if (memory_access_type == VMAT_ELEMENTWISE
7521 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7522 {
7523 gimple_stmt_iterator incr_gsi;
7524 bool insert_after;
355fe088 7525 gimple *incr;
7d75abc8 7526 tree offvar;
7d75abc8
MM
7527 tree ivstep;
7528 tree running_off;
9771b263 7529 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7530 tree stride_base, stride_step, alias_off;
4d694b27
RS
7531 /* Checked by get_load_store_type. */
7532 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7533 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7534
7cfb4d93 7535 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7536 gcc_assert (!nested_in_vect_loop);
7d75abc8 7537
b210f45f 7538 if (grouped_load)
44fc7854
BE
7539 {
7540 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7541 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
44fc7854 7542 }
ab313a8c 7543 else
44fc7854
BE
7544 {
7545 first_stmt = stmt;
7546 first_dr = dr;
b210f45f
RB
7547 }
7548 if (slp && grouped_load)
7549 {
7550 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7551 ref_type = get_group_alias_ptr_type (first_stmt);
7552 }
7553 else
7554 {
7555 if (grouped_load)
7556 cst_offset
7557 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7558 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
44fc7854 7559 group_size = 1;
b210f45f 7560 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7561 }
ab313a8c 7562
14ac6aa2
RB
7563 stride_base
7564 = fold_build_pointer_plus
ab313a8c 7565 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7566 size_binop (PLUS_EXPR,
ab313a8c
RB
7567 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7568 convert_to_ptrofftype (DR_INIT (first_dr))));
7569 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7570
7571 /* For a load with loop-invariant (but other than power-of-2)
7572 stride (i.e. not a grouped access) like so:
7573
7574 for (i = 0; i < n; i += stride)
7575 ... = array[i];
7576
7577 we generate a new induction variable and new accesses to
7578 form a new vector (or vectors, depending on ncopies):
7579
7580 for (j = 0; ; j += VF*stride)
7581 tmp1 = array[j];
7582 tmp2 = array[j + stride];
7583 ...
7584 vectemp = {tmp1, tmp2, ...}
7585 */
7586
ab313a8c
RB
7587 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7588 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7589
7590 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7591
b210f45f
RB
7592 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7593 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7594 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7595 loop, &incr_gsi, insert_after,
7596 &offvar, NULL);
7597 incr = gsi_stmt (incr_gsi);
310213d4 7598 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7599
b210f45f 7600 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7601
7602 prev_stmt_info = NULL;
7603 running_off = offvar;
44fc7854 7604 alias_off = build_int_cst (ref_type, 0);
4d694b27 7605 int nloads = const_nunits;
e09b4c37 7606 int lnel = 1;
7b5fc413 7607 tree ltype = TREE_TYPE (vectype);
ea60dd34 7608 tree lvectype = vectype;
b266b968 7609 auto_vec<tree> dr_chain;
2de001ee 7610 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7611 {
4d694b27 7612 if (group_size < const_nunits)
e09b4c37 7613 {
ff03930a
JJ
7614 /* First check if vec_init optab supports construction from
7615 vector elts directly. */
b397965c 7616 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7617 machine_mode vmode;
7618 if (mode_for_vector (elmode, group_size).exists (&vmode)
7619 && VECTOR_MODE_P (vmode)
414fef4e 7620 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7621 && (convert_optab_handler (vec_init_optab,
7622 TYPE_MODE (vectype), vmode)
7623 != CODE_FOR_nothing))
ea60dd34 7624 {
4d694b27 7625 nloads = const_nunits / group_size;
ea60dd34 7626 lnel = group_size;
ff03930a
JJ
7627 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7628 }
7629 else
7630 {
7631 /* Otherwise avoid emitting a constructor of vector elements
7632 by performing the loads using an integer type of the same
7633 size, constructing a vector of those and then
7634 re-interpreting it as the original vector type.
7635 This avoids a huge runtime penalty due to the general
7636 inability to perform store forwarding from smaller stores
7637 to a larger load. */
7638 unsigned lsize
7639 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7640 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7641 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7642 /* If we can't construct such a vector fall back to
7643 element loads of the original vector type. */
4d694b27 7644 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7645 && VECTOR_MODE_P (vmode)
414fef4e 7646 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7647 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7648 != CODE_FOR_nothing))
7649 {
4d694b27 7650 nloads = lnunits;
ff03930a
JJ
7651 lnel = group_size;
7652 ltype = build_nonstandard_integer_type (lsize, 1);
7653 lvectype = build_vector_type (ltype, nloads);
7654 }
ea60dd34 7655 }
e09b4c37 7656 }
2de001ee 7657 else
e09b4c37 7658 {
ea60dd34 7659 nloads = 1;
4d694b27 7660 lnel = const_nunits;
e09b4c37 7661 ltype = vectype;
e09b4c37 7662 }
2de001ee
RS
7663 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7664 }
7665 if (slp)
7666 {
66c16fd9
RB
7667 /* For SLP permutation support we need to load the whole group,
7668 not only the number of vector stmts the permutation result
7669 fits in. */
b266b968 7670 if (slp_perm)
66c16fd9 7671 {
d9f21f6a
RS
7672 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7673 variable VF. */
7674 unsigned int const_vf = vf.to_constant ();
4d694b27 7675 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7676 dr_chain.create (ncopies);
7677 }
7678 else
7679 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7680 }
4d694b27 7681 unsigned int group_el = 0;
e09b4c37
RB
7682 unsigned HOST_WIDE_INT
7683 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7684 for (j = 0; j < ncopies; j++)
7685 {
7b5fc413 7686 if (nloads > 1)
e09b4c37
RB
7687 vec_alloc (v, nloads);
7688 for (i = 0; i < nloads; i++)
7b5fc413 7689 {
e09b4c37 7690 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7691 group_el * elsz + cst_offset);
19986382
RB
7692 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7693 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7694 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
e09b4c37
RB
7695 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7696 if (nloads > 1)
7697 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7698 gimple_assign_lhs (new_stmt));
7699
7700 group_el += lnel;
7701 if (! slp
7702 || group_el == group_size)
7b5fc413 7703 {
e09b4c37
RB
7704 tree newoff = copy_ssa_name (running_off);
7705 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7706 running_off, stride_step);
7b5fc413
RB
7707 vect_finish_stmt_generation (stmt, incr, gsi);
7708
7709 running_off = newoff;
e09b4c37 7710 group_el = 0;
7b5fc413 7711 }
7b5fc413 7712 }
e09b4c37 7713 if (nloads > 1)
7d75abc8 7714 {
ea60dd34
RB
7715 tree vec_inv = build_constructor (lvectype, v);
7716 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7717 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7718 if (lvectype != vectype)
7719 {
7720 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7721 VIEW_CONVERT_EXPR,
7722 build1 (VIEW_CONVERT_EXPR,
7723 vectype, new_temp));
7724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7725 }
7d75abc8
MM
7726 }
7727
7b5fc413 7728 if (slp)
b266b968 7729 {
b266b968
RB
7730 if (slp_perm)
7731 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7732 else
7733 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7734 }
7d75abc8 7735 else
225ce44b
RB
7736 {
7737 if (j == 0)
7738 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7739 else
7740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7741 prev_stmt_info = vinfo_for_stmt (new_stmt);
7742 }
7d75abc8 7743 }
b266b968 7744 if (slp_perm)
29afecdf
RB
7745 {
7746 unsigned n_perms;
7747 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7748 slp_node_instance, false, &n_perms);
7749 }
7d75abc8
MM
7750 return true;
7751 }
aec7ae7d 7752
b5ec4de7
RS
7753 if (memory_access_type == VMAT_GATHER_SCATTER
7754 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7755 grouped_load = false;
7756
0d0293ac 7757 if (grouped_load)
ebfd146a 7758 {
e14c1050 7759 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7760 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7761 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7762 without permutation. */
7763 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7764 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7765 /* For BB vectorization always use the first stmt to base
7766 the data ref pointer on. */
7767 if (bb_vinfo)
7768 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7769
ebfd146a 7770 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7771 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7772 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7773 ??? But we can only do so if there is exactly one
7774 as we have no way to get at the rest. Leave the CSE
7775 opportunity alone.
7776 ??? With the group load eventually participating
7777 in multiple different permutations (having multiple
7778 slp nodes which refer to the same group) the CSE
7779 is even wrong code. See PR56270. */
7780 && !slp)
ebfd146a
IR
7781 {
7782 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7783 return true;
7784 }
7785 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7786 group_gap_adj = 0;
ebfd146a
IR
7787
7788 /* VEC_NUM is the number of vect stmts to be created for this group. */
7789 if (slp)
7790 {
0d0293ac 7791 grouped_load = false;
91ff1504
RB
7792 /* For SLP permutation support we need to load the whole group,
7793 not only the number of vector stmts the permutation result
7794 fits in. */
7795 if (slp_perm)
b267968e 7796 {
d9f21f6a
RS
7797 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7798 variable VF. */
7799 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7800 unsigned int const_nunits = nunits.to_constant ();
7801 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7802 group_gap_adj = vf * group_size - nunits * vec_num;
7803 }
91ff1504 7804 else
b267968e
RB
7805 {
7806 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7807 group_gap_adj
7808 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7809 }
a70d6342 7810 }
ebfd146a 7811 else
9b999e8c 7812 vec_num = group_size;
44fc7854
BE
7813
7814 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7815 }
7816 else
7817 {
7818 first_stmt = stmt;
7819 first_dr = dr;
7820 group_size = vec_num = 1;
9b999e8c 7821 group_gap_adj = 0;
44fc7854 7822 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7823 }
7824
720f5239 7825 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7826 gcc_assert (alignment_support_scheme);
70088b95
RS
7827 vec_loop_masks *loop_masks
7828 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7829 ? &LOOP_VINFO_MASKS (loop_vinfo)
7830 : NULL);
7cfb4d93
RS
7831 /* Targets with store-lane instructions must not require explicit
7832 realignment. vect_supportable_dr_alignment always returns either
7833 dr_aligned or dr_unaligned_supported for masked operations. */
7834 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7835 && !mask
70088b95 7836 && !loop_masks)
272c6793
RS
7837 || alignment_support_scheme == dr_aligned
7838 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7839
7840 /* In case the vectorization factor (VF) is bigger than the number
7841 of elements that we can fit in a vectype (nunits), we have to generate
7842 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7843 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7844 from one copy of the vector stmt to the next, in the field
ff802fa1 7845 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7846 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7847 stmts that use the defs of the current stmt. The example below
7848 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7849 need to create 4 vectorized stmts):
ebfd146a
IR
7850
7851 before vectorization:
7852 RELATED_STMT VEC_STMT
7853 S1: x = memref - -
7854 S2: z = x + 1 - -
7855
7856 step 1: vectorize stmt S1:
7857 We first create the vector stmt VS1_0, and, as usual, record a
7858 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7859 Next, we create the vector stmt VS1_1, and record a pointer to
7860 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7861 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7862 stmts and pointers:
7863 RELATED_STMT VEC_STMT
7864 VS1_0: vx0 = memref0 VS1_1 -
7865 VS1_1: vx1 = memref1 VS1_2 -
7866 VS1_2: vx2 = memref2 VS1_3 -
7867 VS1_3: vx3 = memref3 - -
7868 S1: x = load - VS1_0
7869 S2: z = x + 1 - -
7870
b8698a0f
L
7871 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7872 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7873 stmt S2. */
7874
0d0293ac 7875 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7876
7877 S1: x2 = &base + 2
7878 S2: x0 = &base
7879 S3: x1 = &base + 1
7880 S4: x3 = &base + 3
7881
b8698a0f 7882 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7883 starting from the access of the first stmt of the chain:
7884
7885 VS1: vx0 = &base
7886 VS2: vx1 = &base + vec_size*1
7887 VS3: vx3 = &base + vec_size*2
7888 VS4: vx4 = &base + vec_size*3
7889
7890 Then permutation statements are generated:
7891
e2c83630
RH
7892 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7893 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7894 ...
7895
7896 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7897 (the order of the data-refs in the output of vect_permute_load_chain
7898 corresponds to the order of scalar stmts in the interleaving chain - see
7899 the documentation of vect_permute_load_chain()).
7900 The generation of permutation stmts and recording them in
0d0293ac 7901 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 7902
b8698a0f 7903 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
7904 permutation stmts above are created for every copy. The result vector
7905 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7906 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
7907
7908 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7909 on a target that supports unaligned accesses (dr_unaligned_supported)
7910 we generate the following code:
7911 p = initial_addr;
7912 indx = 0;
7913 loop {
7914 p = p + indx * vectype_size;
7915 vec_dest = *(p);
7916 indx = indx + 1;
7917 }
7918
7919 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 7920 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
7921 then generate the following code, in which the data in each iteration is
7922 obtained by two vector loads, one from the previous iteration, and one
7923 from the current iteration:
7924 p1 = initial_addr;
7925 msq_init = *(floor(p1))
7926 p2 = initial_addr + VS - 1;
7927 realignment_token = call target_builtin;
7928 indx = 0;
7929 loop {
7930 p2 = p2 + indx * vectype_size
7931 lsq = *(floor(p2))
7932 vec_dest = realign_load (msq, lsq, realignment_token)
7933 indx = indx + 1;
7934 msq = lsq;
7935 } */
7936
7937 /* If the misalignment remains the same throughout the execution of the
7938 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7939 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7940 This can only occur when vectorizing memory accesses in the inner-loop
7941 nested within an outer-loop that is being vectorized. */
7942
d1e4b493 7943 if (nested_in_vect_loop
cf098191
RS
7944 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7945 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
7946 {
7947 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7948 compute_in_loop = true;
7949 }
7950
7951 if ((alignment_support_scheme == dr_explicit_realign_optimized
7952 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7953 && !compute_in_loop)
ebfd146a
IR
7954 {
7955 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7956 alignment_support_scheme, NULL_TREE,
7957 &at_loop);
7958 if (alignment_support_scheme == dr_explicit_realign_optimized)
7959 {
538dd0b7 7960 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7961 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7962 size_one_node);
ebfd146a
IR
7963 }
7964 }
7965 else
7966 at_loop = loop;
7967
62da9e14 7968 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
7969 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7970
ab2fc782
RS
7971 tree bump;
7972 tree vec_offset = NULL_TREE;
7973 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7974 {
7975 aggr_type = NULL_TREE;
7976 bump = NULL_TREE;
7977 }
7978 else if (memory_access_type == VMAT_GATHER_SCATTER)
7979 {
7980 aggr_type = elem_type;
7981 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
7982 &bump, &vec_offset);
7983 }
272c6793 7984 else
ab2fc782
RS
7985 {
7986 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7987 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7988 else
7989 aggr_type = vectype;
7990 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
7991 }
272c6793 7992
c3a8f964 7993 tree vec_mask = NULL_TREE;
ebfd146a 7994 prev_stmt_info = NULL;
4d694b27 7995 poly_uint64 group_elt = 0;
ebfd146a 7996 for (j = 0; j < ncopies; j++)
b8698a0f 7997 {
272c6793 7998 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7999 if (j == 0)
74bf76ed
JJ
8000 {
8001 bool simd_lane_access_p
8002 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8003 if (simd_lane_access_p
8004 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8005 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8006 && integer_zerop (DR_OFFSET (first_dr))
8007 && integer_zerop (DR_INIT (first_dr))
8008 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8009 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8010 && (alignment_support_scheme == dr_aligned
8011 || alignment_support_scheme == dr_unaligned_supported))
8012 {
8013 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8014 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8015 inv_p = false;
74bf76ed 8016 }
4f0a0218
RB
8017 else if (first_stmt_for_drptr
8018 && first_stmt != first_stmt_for_drptr)
8019 {
8020 dataref_ptr
8021 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8022 at_loop, offset, &dummy, gsi,
8023 &ptr_incr, simd_lane_access_p,
ab2fc782 8024 &inv_p, byte_offset, bump);
4f0a0218
RB
8025 /* Adjust the pointer by the difference to first_stmt. */
8026 data_reference_p ptrdr
8027 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8028 tree diff = fold_convert (sizetype,
8029 size_binop (MINUS_EXPR,
8030 DR_INIT (first_dr),
8031 DR_INIT (ptrdr)));
8032 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8033 stmt, diff);
8034 }
bfaa08b7
RS
8035 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8036 {
8037 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8038 &dataref_ptr, &vec_offset);
8039 inv_p = false;
8040 }
74bf76ed
JJ
8041 else
8042 dataref_ptr
8043 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8044 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8045 simd_lane_access_p, &inv_p,
ab2fc782 8046 byte_offset, bump);
c3a8f964
RS
8047 if (mask)
8048 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8049 mask_vectype);
74bf76ed 8050 }
ebfd146a 8051 else
c3a8f964
RS
8052 {
8053 if (dataref_offset)
8054 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8055 bump);
bfaa08b7 8056 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8057 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8058 vec_offset);
c3a8f964 8059 else
ab2fc782
RS
8060 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8061 stmt, bump);
c3a8f964 8062 if (mask)
929b4411 8063 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8064 }
ebfd146a 8065
0d0293ac 8066 if (grouped_load || slp_perm)
9771b263 8067 dr_chain.create (vec_num);
5ce1ee7f 8068
2de001ee 8069 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8070 {
272c6793
RS
8071 tree vec_array;
8072
8073 vec_array = create_vector_array (vectype, vec_num);
8074
7cfb4d93 8075 tree final_mask = NULL_TREE;
70088b95
RS
8076 if (loop_masks)
8077 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8078 vectype, j);
7cfb4d93
RS
8079 if (vec_mask)
8080 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8081 vec_mask, gsi);
8082
7e11fc7f 8083 gcall *call;
7cfb4d93 8084 if (final_mask)
7e11fc7f
RS
8085 {
8086 /* Emit:
8087 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8088 VEC_MASK). */
8089 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8090 tree alias_ptr = build_int_cst (ref_type, align);
8091 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8092 dataref_ptr, alias_ptr,
7cfb4d93 8093 final_mask);
7e11fc7f
RS
8094 }
8095 else
8096 {
8097 /* Emit:
8098 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8099 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8100 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8101 }
a844293d
RS
8102 gimple_call_set_lhs (call, vec_array);
8103 gimple_call_set_nothrow (call, true);
8104 new_stmt = call;
272c6793 8105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8106
272c6793
RS
8107 /* Extract each vector into an SSA_NAME. */
8108 for (i = 0; i < vec_num; i++)
ebfd146a 8109 {
272c6793
RS
8110 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8111 vec_array, i);
9771b263 8112 dr_chain.quick_push (new_temp);
272c6793
RS
8113 }
8114
8115 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8116 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8117
8118 /* Record that VEC_ARRAY is now dead. */
8119 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8120 }
8121 else
8122 {
8123 for (i = 0; i < vec_num; i++)
8124 {
7cfb4d93 8125 tree final_mask = NULL_TREE;
70088b95 8126 if (loop_masks
7cfb4d93 8127 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8128 final_mask = vect_get_loop_mask (gsi, loop_masks,
8129 vec_num * ncopies,
7cfb4d93
RS
8130 vectype, vec_num * j + i);
8131 if (vec_mask)
8132 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8133 vec_mask, gsi);
8134
272c6793
RS
8135 if (i > 0)
8136 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8137 stmt, bump);
272c6793
RS
8138
8139 /* 2. Create the vector-load in the loop. */
8140 switch (alignment_support_scheme)
8141 {
8142 case dr_aligned:
8143 case dr_unaligned_supported:
be1ac4ec 8144 {
644ffefd
MJ
8145 unsigned int align, misalign;
8146
bfaa08b7
RS
8147 if (memory_access_type == VMAT_GATHER_SCATTER)
8148 {
8149 tree scale = size_int (gs_info.scale);
8150 gcall *call;
70088b95 8151 if (loop_masks)
bfaa08b7
RS
8152 call = gimple_build_call_internal
8153 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8154 vec_offset, scale, final_mask);
8155 else
8156 call = gimple_build_call_internal
8157 (IFN_GATHER_LOAD, 3, dataref_ptr,
8158 vec_offset, scale);
8159 gimple_call_set_nothrow (call, true);
8160 new_stmt = call;
8161 data_ref = NULL_TREE;
8162 break;
8163 }
8164
f702e7d4 8165 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8166 if (alignment_support_scheme == dr_aligned)
8167 {
8168 gcc_assert (aligned_access_p (first_dr));
644ffefd 8169 misalign = 0;
272c6793
RS
8170 }
8171 else if (DR_MISALIGNMENT (first_dr) == -1)
8172 {
25f68d90 8173 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8174 misalign = 0;
272c6793
RS
8175 }
8176 else
c3a8f964 8177 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8178 if (dataref_offset == NULL_TREE
8179 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8180 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8181 align, misalign);
c3a8f964 8182
7cfb4d93 8183 if (final_mask)
c3a8f964
RS
8184 {
8185 align = least_bit_hwi (misalign | align);
8186 tree ptr = build_int_cst (ref_type, align);
8187 gcall *call
8188 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8189 dataref_ptr, ptr,
7cfb4d93 8190 final_mask);
c3a8f964
RS
8191 gimple_call_set_nothrow (call, true);
8192 new_stmt = call;
8193 data_ref = NULL_TREE;
8194 }
8195 else
8196 {
8197 data_ref
8198 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8199 dataref_offset
8200 ? dataref_offset
8201 : build_int_cst (ref_type, 0));
8202 if (alignment_support_scheme == dr_aligned)
8203 ;
8204 else if (DR_MISALIGNMENT (first_dr) == -1)
8205 TREE_TYPE (data_ref)
8206 = build_aligned_type (TREE_TYPE (data_ref),
8207 align * BITS_PER_UNIT);
8208 else
8209 TREE_TYPE (data_ref)
8210 = build_aligned_type (TREE_TYPE (data_ref),
8211 TYPE_ALIGN (elem_type));
8212 }
272c6793 8213 break;
be1ac4ec 8214 }
272c6793 8215 case dr_explicit_realign:
267d3070 8216 {
272c6793 8217 tree ptr, bump;
272c6793 8218
d88981fc 8219 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8220
8221 if (compute_in_loop)
8222 msq = vect_setup_realignment (first_stmt, gsi,
8223 &realignment_token,
8224 dr_explicit_realign,
8225 dataref_ptr, NULL);
8226
aed93b23
RB
8227 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8228 ptr = copy_ssa_name (dataref_ptr);
8229 else
8230 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8231 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8232 new_stmt = gimple_build_assign
8233 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8234 build_int_cst
8235 (TREE_TYPE (dataref_ptr),
f702e7d4 8236 -(HOST_WIDE_INT) align));
272c6793
RS
8237 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8238 data_ref
8239 = build2 (MEM_REF, vectype, ptr,
44fc7854 8240 build_int_cst (ref_type, 0));
19986382 8241 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8242 vec_dest = vect_create_destination_var (scalar_dest,
8243 vectype);
8244 new_stmt = gimple_build_assign (vec_dest, data_ref);
8245 new_temp = make_ssa_name (vec_dest, new_stmt);
8246 gimple_assign_set_lhs (new_stmt, new_temp);
8247 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8248 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8249 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8250 msq = new_temp;
8251
d88981fc 8252 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8253 TYPE_SIZE_UNIT (elem_type));
d88981fc 8254 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8255 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8256 new_stmt = gimple_build_assign
8257 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8258 build_int_cst
f702e7d4 8259 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8260 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8261 gimple_assign_set_lhs (new_stmt, ptr);
8262 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8263 data_ref
8264 = build2 (MEM_REF, vectype, ptr,
44fc7854 8265 build_int_cst (ref_type, 0));
272c6793 8266 break;
267d3070 8267 }
272c6793 8268 case dr_explicit_realign_optimized:
f702e7d4
RS
8269 {
8270 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8271 new_temp = copy_ssa_name (dataref_ptr);
8272 else
8273 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8274 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8275 new_stmt = gimple_build_assign
8276 (new_temp, BIT_AND_EXPR, dataref_ptr,
8277 build_int_cst (TREE_TYPE (dataref_ptr),
8278 -(HOST_WIDE_INT) align));
8279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8280 data_ref
8281 = build2 (MEM_REF, vectype, new_temp,
8282 build_int_cst (ref_type, 0));
8283 break;
8284 }
272c6793
RS
8285 default:
8286 gcc_unreachable ();
8287 }
ebfd146a 8288 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8289 /* DATA_REF is null if we've already built the statement. */
8290 if (data_ref)
19986382
RB
8291 {
8292 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8293 new_stmt = gimple_build_assign (vec_dest, data_ref);
8294 }
ebfd146a 8295 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8296 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
8297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8298
272c6793
RS
8299 /* 3. Handle explicit realignment if necessary/supported.
8300 Create in loop:
8301 vec_dest = realign_load (msq, lsq, realignment_token) */
8302 if (alignment_support_scheme == dr_explicit_realign_optimized
8303 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8304 {
272c6793
RS
8305 lsq = gimple_assign_lhs (new_stmt);
8306 if (!realignment_token)
8307 realignment_token = dataref_ptr;
8308 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8309 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8310 msq, lsq, realignment_token);
272c6793
RS
8311 new_temp = make_ssa_name (vec_dest, new_stmt);
8312 gimple_assign_set_lhs (new_stmt, new_temp);
8313 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8314
8315 if (alignment_support_scheme == dr_explicit_realign_optimized)
8316 {
8317 gcc_assert (phi);
8318 if (i == vec_num - 1 && j == ncopies - 1)
8319 add_phi_arg (phi, lsq,
8320 loop_latch_edge (containing_loop),
9e227d60 8321 UNKNOWN_LOCATION);
272c6793
RS
8322 msq = lsq;
8323 }
ebfd146a 8324 }
ebfd146a 8325
59fd17e3
RB
8326 /* 4. Handle invariant-load. */
8327 if (inv_p && !bb_vinfo)
8328 {
59fd17e3 8329 gcc_assert (!grouped_load);
d1417442
JJ
8330 /* If we have versioned for aliasing or the loop doesn't
8331 have any data dependencies that would preclude this,
8332 then we are sure this is a loop invariant load and
8333 thus we can insert it on the preheader edge. */
8334 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8335 && !nested_in_vect_loop
6b916b36 8336 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8337 {
8338 if (dump_enabled_p ())
8339 {
8340 dump_printf_loc (MSG_NOTE, vect_location,
8341 "hoisting out of the vectorized "
8342 "loop: ");
8343 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8344 }
b731b390 8345 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8346 gsi_insert_on_edge_immediate
8347 (loop_preheader_edge (loop),
8348 gimple_build_assign (tem,
8349 unshare_expr
8350 (gimple_assign_rhs1 (stmt))));
8351 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
8352 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8353 set_vinfo_for_stmt (new_stmt,
8354 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
8355 }
8356 else
8357 {
8358 gimple_stmt_iterator gsi2 = *gsi;
8359 gsi_next (&gsi2);
8360 new_temp = vect_init_vector (stmt, scalar_dest,
8361 vectype, &gsi2);
34cd48e5 8362 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 8363 }
59fd17e3
RB
8364 }
8365
62da9e14 8366 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8367 {
aec7ae7d
JJ
8368 tree perm_mask = perm_mask_for_reverse (vectype);
8369 new_temp = permute_vec_elements (new_temp, new_temp,
8370 perm_mask, stmt, gsi);
ebfd146a
IR
8371 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8372 }
267d3070 8373
272c6793 8374 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8375 vect_transform_grouped_load (). */
8376 if (grouped_load || slp_perm)
9771b263 8377 dr_chain.quick_push (new_temp);
267d3070 8378
272c6793
RS
8379 /* Store vector loads in the corresponding SLP_NODE. */
8380 if (slp && !slp_perm)
9771b263 8381 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
8382
8383 /* With SLP permutation we load the gaps as well, without
8384 we need to skip the gaps after we manage to fully load
8385 all elements. group_gap_adj is GROUP_SIZE here. */
8386 group_elt += nunits;
d9f21f6a
RS
8387 if (maybe_ne (group_gap_adj, 0U)
8388 && !slp_perm
8389 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8390 {
d9f21f6a
RS
8391 poly_wide_int bump_val
8392 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8393 * group_gap_adj);
8e6cdc90 8394 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8395 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8396 stmt, bump);
8397 group_elt = 0;
8398 }
272c6793 8399 }
9b999e8c
RB
8400 /* Bump the vector pointer to account for a gap or for excess
8401 elements loaded for a permuted SLP load. */
d9f21f6a 8402 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8403 {
d9f21f6a
RS
8404 poly_wide_int bump_val
8405 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8406 * group_gap_adj);
8e6cdc90 8407 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8408 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8409 stmt, bump);
8410 }
ebfd146a
IR
8411 }
8412
8413 if (slp && !slp_perm)
8414 continue;
8415
8416 if (slp_perm)
8417 {
29afecdf 8418 unsigned n_perms;
01d8bf07 8419 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8420 slp_node_instance, false,
8421 &n_perms))
ebfd146a 8422 {
9771b263 8423 dr_chain.release ();
ebfd146a
IR
8424 return false;
8425 }
8426 }
8427 else
8428 {
0d0293ac 8429 if (grouped_load)
ebfd146a 8430 {
2de001ee 8431 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8432 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8433 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8434 }
8435 else
8436 {
8437 if (j == 0)
8438 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8439 else
8440 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8441 prev_stmt_info = vinfo_for_stmt (new_stmt);
8442 }
8443 }
9771b263 8444 dr_chain.release ();
ebfd146a
IR
8445 }
8446
ebfd146a
IR
8447 return true;
8448}
8449
8450/* Function vect_is_simple_cond.
b8698a0f 8451
ebfd146a
IR
8452 Input:
8453 LOOP - the loop that is being vectorized.
8454 COND - Condition that is checked for simple use.
8455
e9e1d143
RG
8456 Output:
8457 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8458 *DTS - The def types for the arguments of the comparison
e9e1d143 8459
ebfd146a
IR
8460 Returns whether a COND can be vectorized. Checks whether
8461 condition operands are supportable using vec_is_simple_use. */
8462
87aab9b2 8463static bool
4fc5ebf1 8464vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8465 tree *comp_vectype, enum vect_def_type *dts,
8466 tree vectype)
ebfd146a
IR
8467{
8468 tree lhs, rhs;
e9e1d143 8469 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8470
a414c77f
IE
8471 /* Mask case. */
8472 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8473 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
8474 {
8475 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8476 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 8477 &dts[0], comp_vectype)
a414c77f
IE
8478 || !*comp_vectype
8479 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8480 return false;
8481 return true;
8482 }
8483
ebfd146a
IR
8484 if (!COMPARISON_CLASS_P (cond))
8485 return false;
8486
8487 lhs = TREE_OPERAND (cond, 0);
8488 rhs = TREE_OPERAND (cond, 1);
8489
8490 if (TREE_CODE (lhs) == SSA_NAME)
8491 {
355fe088 8492 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 8493 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
8494 return false;
8495 }
4fc5ebf1
JG
8496 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8497 || TREE_CODE (lhs) == FIXED_CST)
8498 dts[0] = vect_constant_def;
8499 else
ebfd146a
IR
8500 return false;
8501
8502 if (TREE_CODE (rhs) == SSA_NAME)
8503 {
355fe088 8504 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 8505 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
8506 return false;
8507 }
4fc5ebf1
JG
8508 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8509 || TREE_CODE (rhs) == FIXED_CST)
8510 dts[1] = vect_constant_def;
8511 else
ebfd146a
IR
8512 return false;
8513
28b33016 8514 if (vectype1 && vectype2
928686b1
RS
8515 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8516 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8517 return false;
8518
e9e1d143 8519 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8
RB
8520 /* Invariant comparison. */
8521 if (! *comp_vectype)
8522 {
8523 tree scalar_type = TREE_TYPE (lhs);
8524 /* If we can widen the comparison to match vectype do so. */
8525 if (INTEGRAL_TYPE_P (scalar_type)
8526 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8527 TYPE_SIZE (TREE_TYPE (vectype))))
8528 scalar_type = build_nonstandard_integer_type
8529 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8530 TYPE_UNSIGNED (scalar_type));
8531 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8532 }
8533
ebfd146a
IR
8534 return true;
8535}
8536
8537/* vectorizable_condition.
8538
b8698a0f
L
8539 Check if STMT is conditional modify expression that can be vectorized.
8540 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8541 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8542 at GSI.
8543
8544 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8545 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8546 else clause if it is 2).
ebfd146a
IR
8547
8548 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8549
4bbe8262 8550bool
355fe088
TS
8551vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8552 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 8553 slp_tree slp_node)
ebfd146a
IR
8554{
8555 tree scalar_dest = NULL_TREE;
8556 tree vec_dest = NULL_TREE;
01216d27
JJ
8557 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8558 tree then_clause, else_clause;
ebfd146a 8559 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8560 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8561 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8562 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8563 tree vec_compare;
ebfd146a
IR
8564 tree new_temp;
8565 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8566 enum vect_def_type dts[4]
8567 = {vect_unknown_def_type, vect_unknown_def_type,
8568 vect_unknown_def_type, vect_unknown_def_type};
8569 int ndts = 4;
f7e531cf 8570 int ncopies;
01216d27 8571 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8572 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8573 int i, j;
8574 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8575 vec<tree> vec_oprnds0 = vNULL;
8576 vec<tree> vec_oprnds1 = vNULL;
8577 vec<tree> vec_oprnds2 = vNULL;
8578 vec<tree> vec_oprnds3 = vNULL;
74946978 8579 tree vec_cmp_type;
a414c77f 8580 bool masked = false;
b8698a0f 8581
f7e531cf
IR
8582 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8583 return false;
8584
bb6c2b68
RS
8585 vect_reduction_type reduction_type
8586 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8587 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8588 {
8589 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8590 return false;
ebfd146a 8591
af29617a
AH
8592 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8593 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8594 && reduc_def))
8595 return false;
ebfd146a 8596
af29617a
AH
8597 /* FORNOW: not yet supported. */
8598 if (STMT_VINFO_LIVE_P (stmt_info))
8599 {
8600 if (dump_enabled_p ())
8601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8602 "value used after loop.\n");
8603 return false;
8604 }
ebfd146a
IR
8605 }
8606
8607 /* Is vectorizable conditional operation? */
8608 if (!is_gimple_assign (stmt))
8609 return false;
8610
8611 code = gimple_assign_rhs_code (stmt);
8612
8613 if (code != COND_EXPR)
8614 return false;
8615
465c8c19 8616 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8617 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8618
fce57248 8619 if (slp_node)
465c8c19
JJ
8620 ncopies = 1;
8621 else
e8f142e2 8622 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8623
8624 gcc_assert (ncopies >= 1);
8625 if (reduc_index && ncopies > 1)
8626 return false; /* FORNOW */
8627
4e71066d
RG
8628 cond_expr = gimple_assign_rhs1 (stmt);
8629 then_clause = gimple_assign_rhs2 (stmt);
8630 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8631
4fc5ebf1 8632 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8da4c8d8 8633 &comp_vectype, &dts[0], vectype)
e9e1d143 8634 || !comp_vectype)
ebfd146a
IR
8635 return false;
8636
81c40241 8637 gimple *def_stmt;
4fc5ebf1 8638 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
8639 &vectype1))
8640 return false;
4fc5ebf1 8641 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 8642 &vectype2))
ebfd146a 8643 return false;
2947d3b2
IE
8644
8645 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8646 return false;
8647
8648 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8649 return false;
8650
28b33016
IE
8651 masked = !COMPARISON_CLASS_P (cond_expr);
8652 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8653
74946978
MP
8654 if (vec_cmp_type == NULL_TREE)
8655 return false;
784fb9b3 8656
01216d27
JJ
8657 cond_code = TREE_CODE (cond_expr);
8658 if (!masked)
8659 {
8660 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8661 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8662 }
8663
8664 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8665 {
8666 /* Boolean values may have another representation in vectors
8667 and therefore we prefer bit operations over comparison for
8668 them (which also works for scalar masks). We store opcodes
8669 to use in bitop1 and bitop2. Statement is vectorized as
8670 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8671 depending on bitop1 and bitop2 arity. */
8672 switch (cond_code)
8673 {
8674 case GT_EXPR:
8675 bitop1 = BIT_NOT_EXPR;
8676 bitop2 = BIT_AND_EXPR;
8677 break;
8678 case GE_EXPR:
8679 bitop1 = BIT_NOT_EXPR;
8680 bitop2 = BIT_IOR_EXPR;
8681 break;
8682 case LT_EXPR:
8683 bitop1 = BIT_NOT_EXPR;
8684 bitop2 = BIT_AND_EXPR;
8685 std::swap (cond_expr0, cond_expr1);
8686 break;
8687 case LE_EXPR:
8688 bitop1 = BIT_NOT_EXPR;
8689 bitop2 = BIT_IOR_EXPR;
8690 std::swap (cond_expr0, cond_expr1);
8691 break;
8692 case NE_EXPR:
8693 bitop1 = BIT_XOR_EXPR;
8694 break;
8695 case EQ_EXPR:
8696 bitop1 = BIT_XOR_EXPR;
8697 bitop2 = BIT_NOT_EXPR;
8698 break;
8699 default:
8700 return false;
8701 }
8702 cond_code = SSA_NAME;
8703 }
8704
b8698a0f 8705 if (!vec_stmt)
ebfd146a
IR
8706 {
8707 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
01216d27
JJ
8708 if (bitop1 != NOP_EXPR)
8709 {
8710 machine_mode mode = TYPE_MODE (comp_vectype);
8711 optab optab;
8712
8713 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8714 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8715 return false;
8716
8717 if (bitop2 != NOP_EXPR)
8718 {
8719 optab = optab_for_tree_code (bitop2, comp_vectype,
8720 optab_default);
8721 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8722 return false;
8723 }
8724 }
4fc5ebf1
JG
8725 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8726 cond_code))
8727 {
78604de0
RB
8728 if (!slp_node)
8729 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
4fc5ebf1
JG
8730 return true;
8731 }
8732 return false;
ebfd146a
IR
8733 }
8734
f7e531cf
IR
8735 /* Transform. */
8736
8737 if (!slp_node)
8738 {
9771b263
DN
8739 vec_oprnds0.create (1);
8740 vec_oprnds1.create (1);
8741 vec_oprnds2.create (1);
8742 vec_oprnds3.create (1);
f7e531cf 8743 }
ebfd146a
IR
8744
8745 /* Handle def. */
8746 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8747 if (reduction_type != EXTRACT_LAST_REDUCTION)
8748 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8749
8750 /* Handle cond expr. */
a855b1b1
MM
8751 for (j = 0; j < ncopies; j++)
8752 {
bb6c2b68 8753 gimple *new_stmt = NULL;
a855b1b1
MM
8754 if (j == 0)
8755 {
f7e531cf
IR
8756 if (slp_node)
8757 {
00f96dc9
TS
8758 auto_vec<tree, 4> ops;
8759 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8760
a414c77f 8761 if (masked)
01216d27 8762 ops.safe_push (cond_expr);
a414c77f
IE
8763 else
8764 {
01216d27
JJ
8765 ops.safe_push (cond_expr0);
8766 ops.safe_push (cond_expr1);
a414c77f 8767 }
9771b263
DN
8768 ops.safe_push (then_clause);
8769 ops.safe_push (else_clause);
306b0c92 8770 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8771 vec_oprnds3 = vec_defs.pop ();
8772 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8773 if (!masked)
8774 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8775 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8776 }
8777 else
8778 {
355fe088 8779 gimple *gtemp;
a414c77f
IE
8780 if (masked)
8781 {
8782 vec_cond_lhs
8783 = vect_get_vec_def_for_operand (cond_expr, stmt,
8784 comp_vectype);
8785 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8786 &gtemp, &dts[0]);
8787 }
8788 else
8789 {
01216d27
JJ
8790 vec_cond_lhs
8791 = vect_get_vec_def_for_operand (cond_expr0,
8792 stmt, comp_vectype);
8793 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8794
8795 vec_cond_rhs
8796 = vect_get_vec_def_for_operand (cond_expr1,
8797 stmt, comp_vectype);
8798 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8799 }
f7e531cf
IR
8800 if (reduc_index == 1)
8801 vec_then_clause = reduc_def;
8802 else
8803 {
8804 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8805 stmt);
8806 vect_is_simple_use (then_clause, loop_vinfo,
8807 &gtemp, &dts[2]);
f7e531cf
IR
8808 }
8809 if (reduc_index == 2)
8810 vec_else_clause = reduc_def;
8811 else
8812 {
8813 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8814 stmt);
8815 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8816 }
a855b1b1
MM
8817 }
8818 }
8819 else
8820 {
a414c77f
IE
8821 vec_cond_lhs
8822 = vect_get_vec_def_for_stmt_copy (dts[0],
8823 vec_oprnds0.pop ());
8824 if (!masked)
8825 vec_cond_rhs
8826 = vect_get_vec_def_for_stmt_copy (dts[1],
8827 vec_oprnds1.pop ());
8828
a855b1b1 8829 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8830 vec_oprnds2.pop ());
a855b1b1 8831 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8832 vec_oprnds3.pop ());
f7e531cf
IR
8833 }
8834
8835 if (!slp_node)
8836 {
9771b263 8837 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8838 if (!masked)
8839 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8840 vec_oprnds2.quick_push (vec_then_clause);
8841 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8842 }
8843
9dc3f7de 8844 /* Arguments are ready. Create the new vector stmt. */
9771b263 8845 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8846 {
9771b263
DN
8847 vec_then_clause = vec_oprnds2[i];
8848 vec_else_clause = vec_oprnds3[i];
a855b1b1 8849
a414c77f
IE
8850 if (masked)
8851 vec_compare = vec_cond_lhs;
8852 else
8853 {
8854 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8855 if (bitop1 == NOP_EXPR)
8856 vec_compare = build2 (cond_code, vec_cmp_type,
8857 vec_cond_lhs, vec_cond_rhs);
8858 else
8859 {
8860 new_temp = make_ssa_name (vec_cmp_type);
8861 if (bitop1 == BIT_NOT_EXPR)
8862 new_stmt = gimple_build_assign (new_temp, bitop1,
8863 vec_cond_rhs);
8864 else
8865 new_stmt
8866 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8867 vec_cond_rhs);
8868 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8869 if (bitop2 == NOP_EXPR)
8870 vec_compare = new_temp;
8871 else if (bitop2 == BIT_NOT_EXPR)
8872 {
8873 /* Instead of doing ~x ? y : z do x ? z : y. */
8874 vec_compare = new_temp;
8875 std::swap (vec_then_clause, vec_else_clause);
8876 }
8877 else
8878 {
8879 vec_compare = make_ssa_name (vec_cmp_type);
8880 new_stmt
8881 = gimple_build_assign (vec_compare, bitop2,
8882 vec_cond_lhs, new_temp);
8883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8884 }
8885 }
a414c77f 8886 }
bb6c2b68
RS
8887 if (reduction_type == EXTRACT_LAST_REDUCTION)
8888 {
8889 if (!is_gimple_val (vec_compare))
8890 {
8891 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8892 new_stmt = gimple_build_assign (vec_compare_name,
8893 vec_compare);
8894 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8895 vec_compare = vec_compare_name;
8896 }
8897 gcc_assert (reduc_index == 2);
8898 new_stmt = gimple_build_call_internal
8899 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8900 vec_then_clause);
8901 gimple_call_set_lhs (new_stmt, scalar_dest);
8902 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8903 if (stmt == gsi_stmt (*gsi))
8904 vect_finish_replace_stmt (stmt, new_stmt);
8905 else
8906 {
8907 /* In this case we're moving the definition to later in the
8908 block. That doesn't matter because the only uses of the
8909 lhs are in phi statements. */
8910 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8911 gsi_remove (&old_gsi, true);
8912 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8913 }
8914 }
8915 else
8916 {
8917 new_temp = make_ssa_name (vec_dest);
8918 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8919 vec_compare, vec_then_clause,
8920 vec_else_clause);
8921 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8922 }
f7e531cf 8923 if (slp_node)
9771b263 8924 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
8925 }
8926
8927 if (slp_node)
8928 continue;
8929
8930 if (j == 0)
8931 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8932 else
8933 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8934
8935 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 8936 }
b8698a0f 8937
9771b263
DN
8938 vec_oprnds0.release ();
8939 vec_oprnds1.release ();
8940 vec_oprnds2.release ();
8941 vec_oprnds3.release ();
f7e531cf 8942
ebfd146a
IR
8943 return true;
8944}
8945
42fd8198
IE
8946/* vectorizable_comparison.
8947
8948 Check if STMT is comparison expression that can be vectorized.
8949 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8950 comparison, put it in VEC_STMT, and insert it at GSI.
8951
8952 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8953
fce57248 8954static bool
42fd8198
IE
8955vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8956 gimple **vec_stmt, tree reduc_def,
8957 slp_tree slp_node)
8958{
8959 tree lhs, rhs1, rhs2;
8960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8961 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8963 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8964 tree new_temp;
8965 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8966 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 8967 int ndts = 2;
928686b1 8968 poly_uint64 nunits;
42fd8198 8969 int ncopies;
49e76ff1 8970 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
8971 stmt_vec_info prev_stmt_info = NULL;
8972 int i, j;
8973 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8974 vec<tree> vec_oprnds0 = vNULL;
8975 vec<tree> vec_oprnds1 = vNULL;
8976 gimple *def_stmt;
8977 tree mask_type;
8978 tree mask;
8979
c245362b
IE
8980 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8981 return false;
8982
30480bcd 8983 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
8984 return false;
8985
8986 mask_type = vectype;
8987 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8988
fce57248 8989 if (slp_node)
42fd8198
IE
8990 ncopies = 1;
8991 else
e8f142e2 8992 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
8993
8994 gcc_assert (ncopies >= 1);
42fd8198
IE
8995 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8996 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8997 && reduc_def))
8998 return false;
8999
9000 if (STMT_VINFO_LIVE_P (stmt_info))
9001 {
9002 if (dump_enabled_p ())
9003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9004 "value used after loop.\n");
9005 return false;
9006 }
9007
9008 if (!is_gimple_assign (stmt))
9009 return false;
9010
9011 code = gimple_assign_rhs_code (stmt);
9012
9013 if (TREE_CODE_CLASS (code) != tcc_comparison)
9014 return false;
9015
9016 rhs1 = gimple_assign_rhs1 (stmt);
9017 rhs2 = gimple_assign_rhs2 (stmt);
9018
9019 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9020 &dts[0], &vectype1))
9021 return false;
9022
9023 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9024 &dts[1], &vectype2))
9025 return false;
9026
9027 if (vectype1 && vectype2
928686b1
RS
9028 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9029 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9030 return false;
9031
9032 vectype = vectype1 ? vectype1 : vectype2;
9033
9034 /* Invariant comparison. */
9035 if (!vectype)
9036 {
69a9a66f 9037 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9038 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9039 return false;
9040 }
928686b1 9041 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9042 return false;
9043
49e76ff1
IE
9044 /* Can't compare mask and non-mask types. */
9045 if (vectype1 && vectype2
9046 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9047 return false;
9048
9049 /* Boolean values may have another representation in vectors
9050 and therefore we prefer bit operations over comparison for
9051 them (which also works for scalar masks). We store opcodes
9052 to use in bitop1 and bitop2. Statement is vectorized as
9053 BITOP2 (rhs1 BITOP1 rhs2) or
9054 rhs1 BITOP2 (BITOP1 rhs2)
9055 depending on bitop1 and bitop2 arity. */
9056 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9057 {
9058 if (code == GT_EXPR)
9059 {
9060 bitop1 = BIT_NOT_EXPR;
9061 bitop2 = BIT_AND_EXPR;
9062 }
9063 else if (code == GE_EXPR)
9064 {
9065 bitop1 = BIT_NOT_EXPR;
9066 bitop2 = BIT_IOR_EXPR;
9067 }
9068 else if (code == LT_EXPR)
9069 {
9070 bitop1 = BIT_NOT_EXPR;
9071 bitop2 = BIT_AND_EXPR;
9072 std::swap (rhs1, rhs2);
264d951a 9073 std::swap (dts[0], dts[1]);
49e76ff1
IE
9074 }
9075 else if (code == LE_EXPR)
9076 {
9077 bitop1 = BIT_NOT_EXPR;
9078 bitop2 = BIT_IOR_EXPR;
9079 std::swap (rhs1, rhs2);
264d951a 9080 std::swap (dts[0], dts[1]);
49e76ff1
IE
9081 }
9082 else
9083 {
9084 bitop1 = BIT_XOR_EXPR;
9085 if (code == EQ_EXPR)
9086 bitop2 = BIT_NOT_EXPR;
9087 }
9088 }
9089
42fd8198
IE
9090 if (!vec_stmt)
9091 {
9092 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
78604de0
RB
9093 if (!slp_node)
9094 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9095 dts, ndts, NULL, NULL);
49e76ff1 9096 if (bitop1 == NOP_EXPR)
96592eed 9097 return expand_vec_cmp_expr_p (vectype, mask_type, code);
49e76ff1
IE
9098 else
9099 {
9100 machine_mode mode = TYPE_MODE (vectype);
9101 optab optab;
9102
9103 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9104 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9105 return false;
9106
9107 if (bitop2 != NOP_EXPR)
9108 {
9109 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9110 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9111 return false;
9112 }
9113 return true;
9114 }
42fd8198
IE
9115 }
9116
9117 /* Transform. */
9118 if (!slp_node)
9119 {
9120 vec_oprnds0.create (1);
9121 vec_oprnds1.create (1);
9122 }
9123
9124 /* Handle def. */
9125 lhs = gimple_assign_lhs (stmt);
9126 mask = vect_create_destination_var (lhs, mask_type);
9127
9128 /* Handle cmp expr. */
9129 for (j = 0; j < ncopies; j++)
9130 {
9131 gassign *new_stmt = NULL;
9132 if (j == 0)
9133 {
9134 if (slp_node)
9135 {
9136 auto_vec<tree, 2> ops;
9137 auto_vec<vec<tree>, 2> vec_defs;
9138
9139 ops.safe_push (rhs1);
9140 ops.safe_push (rhs2);
306b0c92 9141 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9142 vec_oprnds1 = vec_defs.pop ();
9143 vec_oprnds0 = vec_defs.pop ();
9144 }
9145 else
9146 {
e4af0bc4
IE
9147 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9148 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9149 }
9150 }
9151 else
9152 {
9153 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9154 vec_oprnds0.pop ());
9155 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9156 vec_oprnds1.pop ());
9157 }
9158
9159 if (!slp_node)
9160 {
9161 vec_oprnds0.quick_push (vec_rhs1);
9162 vec_oprnds1.quick_push (vec_rhs2);
9163 }
9164
9165 /* Arguments are ready. Create the new vector stmt. */
9166 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9167 {
9168 vec_rhs2 = vec_oprnds1[i];
9169
9170 new_temp = make_ssa_name (mask);
49e76ff1
IE
9171 if (bitop1 == NOP_EXPR)
9172 {
9173 new_stmt = gimple_build_assign (new_temp, code,
9174 vec_rhs1, vec_rhs2);
9175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9176 }
9177 else
9178 {
9179 if (bitop1 == BIT_NOT_EXPR)
9180 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9181 else
9182 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9183 vec_rhs2);
9184 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9185 if (bitop2 != NOP_EXPR)
9186 {
9187 tree res = make_ssa_name (mask);
9188 if (bitop2 == BIT_NOT_EXPR)
9189 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9190 else
9191 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9192 new_temp);
9193 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9194 }
9195 }
42fd8198
IE
9196 if (slp_node)
9197 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9198 }
9199
9200 if (slp_node)
9201 continue;
9202
9203 if (j == 0)
9204 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9205 else
9206 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9207
9208 prev_stmt_info = vinfo_for_stmt (new_stmt);
9209 }
9210
9211 vec_oprnds0.release ();
9212 vec_oprnds1.release ();
9213
9214 return true;
9215}
ebfd146a 9216
68a0f2ff
RS
9217/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9218 can handle all live statements in the node. Otherwise return true
9219 if STMT is not live or if vectorizable_live_operation can handle it.
9220 GSI and VEC_STMT are as for vectorizable_live_operation. */
9221
9222static bool
9223can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9224 slp_tree slp_node, gimple **vec_stmt)
9225{
9226 if (slp_node)
9227 {
9228 gimple *slp_stmt;
9229 unsigned int i;
9230 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9231 {
9232 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9233 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9234 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9235 vec_stmt))
9236 return false;
9237 }
9238 }
9239 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9240 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
9241 return false;
9242
9243 return true;
9244}
9245
8644a673 9246/* Make sure the statement is vectorizable. */
ebfd146a
IR
9247
9248bool
891ad31c
RB
9249vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9250 slp_instance node_instance)
ebfd146a 9251{
8644a673 9252 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 9253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9254 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9255 bool ok;
355fe088 9256 gimple *pattern_stmt;
363477c0 9257 gimple_seq pattern_def_seq;
ebfd146a 9258
73fbfcad 9259 if (dump_enabled_p ())
ebfd146a 9260 {
78c60e3d
SS
9261 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9262 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9263 }
ebfd146a 9264
1825a1f3 9265 if (gimple_has_volatile_ops (stmt))
b8698a0f 9266 {
73fbfcad 9267 if (dump_enabled_p ())
78c60e3d 9268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9269 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9270
9271 return false;
9272 }
b8698a0f
L
9273
9274 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9275 to include:
9276 - the COND_EXPR which is the loop exit condition
9277 - any LABEL_EXPRs in the loop
b8698a0f 9278 - computations that are used only for array indexing or loop control.
8644a673 9279 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9280 instance, therefore, all the statements are relevant.
ebfd146a 9281
d092494c 9282 Pattern statement needs to be analyzed instead of the original statement
83197f37 9283 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9284 statements. In basic blocks we are called from some SLP instance
9285 traversal, don't analyze pattern stmts instead, the pattern stmts
9286 already will be part of SLP instance. */
83197f37
IR
9287
9288 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9289 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9290 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9291 {
9d5e7640 9292 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 9293 && pattern_stmt
9d5e7640
IR
9294 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9295 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9296 {
83197f37 9297 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
9298 stmt = pattern_stmt;
9299 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 9300 if (dump_enabled_p ())
9d5e7640 9301 {
78c60e3d
SS
9302 dump_printf_loc (MSG_NOTE, vect_location,
9303 "==> examining pattern statement: ");
9304 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9305 }
9306 }
9307 else
9308 {
73fbfcad 9309 if (dump_enabled_p ())
e645e942 9310 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9311
9d5e7640
IR
9312 return true;
9313 }
8644a673 9314 }
83197f37 9315 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9316 && node == NULL
83197f37
IR
9317 && pattern_stmt
9318 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9319 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9320 {
9321 /* Analyze PATTERN_STMT too. */
73fbfcad 9322 if (dump_enabled_p ())
83197f37 9323 {
78c60e3d
SS
9324 dump_printf_loc (MSG_NOTE, vect_location,
9325 "==> examining pattern statement: ");
9326 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9327 }
9328
891ad31c
RB
9329 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9330 node_instance))
83197f37
IR
9331 return false;
9332 }
ebfd146a 9333
1107f3ae 9334 if (is_pattern_stmt_p (stmt_info)
079c527f 9335 && node == NULL
363477c0 9336 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 9337 {
363477c0 9338 gimple_stmt_iterator si;
1107f3ae 9339
363477c0
JJ
9340 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9341 {
355fe088 9342 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
9343 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9344 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9345 {
9346 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 9347 if (dump_enabled_p ())
363477c0 9348 {
78c60e3d
SS
9349 dump_printf_loc (MSG_NOTE, vect_location,
9350 "==> examining pattern def statement: ");
9351 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 9352 }
1107f3ae 9353
363477c0 9354 if (!vect_analyze_stmt (pattern_def_stmt,
891ad31c 9355 need_to_vectorize, node, node_instance))
363477c0
JJ
9356 return false;
9357 }
9358 }
9359 }
1107f3ae 9360
8644a673
IR
9361 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9362 {
9363 case vect_internal_def:
9364 break;
ebfd146a 9365
8644a673 9366 case vect_reduction_def:
7c5222ff 9367 case vect_nested_cycle:
14a61437
RB
9368 gcc_assert (!bb_vinfo
9369 && (relevance == vect_used_in_outer
9370 || relevance == vect_used_in_outer_by_reduction
9371 || relevance == vect_used_by_reduction
b28ead45
AH
9372 || relevance == vect_unused_in_scope
9373 || relevance == vect_used_only_live));
8644a673
IR
9374 break;
9375
9376 case vect_induction_def:
e7baeb39
RB
9377 gcc_assert (!bb_vinfo);
9378 break;
9379
8644a673
IR
9380 case vect_constant_def:
9381 case vect_external_def:
9382 case vect_unknown_def_type:
9383 default:
9384 gcc_unreachable ();
9385 }
ebfd146a 9386
8644a673 9387 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9388 {
8644a673 9389 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9390 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9391 || (is_gimple_call (stmt)
9392 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9393 *need_to_vectorize = true;
ebfd146a
IR
9394 }
9395
b1af7da6
RB
9396 if (PURE_SLP_STMT (stmt_info) && !node)
9397 {
9398 dump_printf_loc (MSG_NOTE, vect_location,
9399 "handled only by SLP analysis\n");
9400 return true;
9401 }
9402
9403 ok = true;
9404 if (!bb_vinfo
9405 && (STMT_VINFO_RELEVANT_P (stmt_info)
9406 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9407 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9408 || vectorizable_conversion (stmt, NULL, NULL, node)
9409 || vectorizable_shift (stmt, NULL, NULL, node)
9410 || vectorizable_operation (stmt, NULL, NULL, node)
9411 || vectorizable_assignment (stmt, NULL, NULL, node)
9412 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9413 || vectorizable_call (stmt, NULL, NULL, node)
9414 || vectorizable_store (stmt, NULL, NULL, node)
891ad31c 9415 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
e7baeb39 9416 || vectorizable_induction (stmt, NULL, NULL, node)
42fd8198
IE
9417 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9418 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
9419 else
9420 {
9421 if (bb_vinfo)
9422 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9423 || vectorizable_conversion (stmt, NULL, NULL, node)
9424 || vectorizable_shift (stmt, NULL, NULL, node)
9425 || vectorizable_operation (stmt, NULL, NULL, node)
9426 || vectorizable_assignment (stmt, NULL, NULL, node)
9427 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9428 || vectorizable_call (stmt, NULL, NULL, node)
9429 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
9430 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9431 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 9432 }
8644a673
IR
9433
9434 if (!ok)
ebfd146a 9435 {
73fbfcad 9436 if (dump_enabled_p ())
8644a673 9437 {
78c60e3d
SS
9438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9439 "not vectorized: relevant stmt not ");
9440 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9441 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9442 }
b8698a0f 9443
ebfd146a
IR
9444 return false;
9445 }
9446
a70d6342
IR
9447 if (bb_vinfo)
9448 return true;
9449
8644a673
IR
9450 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9451 need extra handling, except for vectorizable reductions. */
68a0f2ff
RS
9452 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9453 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
ebfd146a 9454 {
73fbfcad 9455 if (dump_enabled_p ())
8644a673 9456 {
78c60e3d 9457 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9458 "not vectorized: live stmt not supported: ");
78c60e3d 9459 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9460 }
b8698a0f 9461
8644a673 9462 return false;
ebfd146a
IR
9463 }
9464
ebfd146a
IR
9465 return true;
9466}
9467
9468
9469/* Function vect_transform_stmt.
9470
9471 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9472
9473bool
355fe088 9474vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9475 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9476 slp_instance slp_node_instance)
9477{
9478 bool is_store = false;
355fe088 9479 gimple *vec_stmt = NULL;
ebfd146a 9480 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 9481 bool done;
ebfd146a 9482
fce57248 9483 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 9484 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9485
e57d9a82
RB
9486 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9487 && nested_in_vect_loop_p
9488 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9489 stmt));
9490
ebfd146a
IR
9491 switch (STMT_VINFO_TYPE (stmt_info))
9492 {
9493 case type_demotion_vec_info_type:
ebfd146a 9494 case type_promotion_vec_info_type:
ebfd146a
IR
9495 case type_conversion_vec_info_type:
9496 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9497 gcc_assert (done);
9498 break;
9499
9500 case induc_vec_info_type:
e7baeb39 9501 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
9502 gcc_assert (done);
9503 break;
9504
9dc3f7de
IR
9505 case shift_vec_info_type:
9506 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9507 gcc_assert (done);
9508 break;
9509
ebfd146a
IR
9510 case op_vec_info_type:
9511 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9512 gcc_assert (done);
9513 break;
9514
9515 case assignment_vec_info_type:
9516 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9517 gcc_assert (done);
9518 break;
9519
9520 case load_vec_info_type:
b8698a0f 9521 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
9522 slp_node_instance);
9523 gcc_assert (done);
9524 break;
9525
9526 case store_vec_info_type:
9527 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9528 gcc_assert (done);
0d0293ac 9529 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9530 {
9531 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9532 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9533 one are skipped, and there vec_stmt_info shouldn't be freed
9534 meanwhile. */
0d0293ac 9535 *grouped_store = true;
f307441a
RS
9536 stmt_vec_info group_info
9537 = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9538 if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
ebfd146a 9539 is_store = true;
f307441a 9540 }
ebfd146a
IR
9541 else
9542 is_store = true;
9543 break;
9544
9545 case condition_vec_info_type:
f7e531cf 9546 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
9547 gcc_assert (done);
9548 break;
9549
42fd8198
IE
9550 case comparison_vec_info_type:
9551 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9552 gcc_assert (done);
9553 break;
9554
ebfd146a 9555 case call_vec_info_type:
190c2236 9556 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 9557 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9558 break;
9559
0136f8f0
AH
9560 case call_simd_clone_vec_info_type:
9561 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9562 stmt = gsi_stmt (*gsi);
9563 break;
9564
ebfd146a 9565 case reduc_vec_info_type:
891ad31c
RB
9566 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9567 slp_node_instance);
ebfd146a
IR
9568 gcc_assert (done);
9569 break;
9570
9571 default:
9572 if (!STMT_VINFO_LIVE_P (stmt_info))
9573 {
73fbfcad 9574 if (dump_enabled_p ())
78c60e3d 9575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9576 "stmt not supported.\n");
ebfd146a
IR
9577 gcc_unreachable ();
9578 }
9579 }
9580
225ce44b
RB
9581 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9582 This would break hybrid SLP vectorization. */
9583 if (slp_node)
d90f8440
RB
9584 gcc_assert (!vec_stmt
9585 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9586
ebfd146a
IR
9587 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9588 is being vectorized, but outside the immediately enclosing loop. */
9589 if (vec_stmt
e57d9a82 9590 && nested_p
ebfd146a
IR
9591 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9592 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9593 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9594 vect_used_in_outer_by_reduction))
ebfd146a 9595 {
a70d6342
IR
9596 struct loop *innerloop = LOOP_VINFO_LOOP (
9597 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9598 imm_use_iterator imm_iter;
9599 use_operand_p use_p;
9600 tree scalar_dest;
355fe088 9601 gimple *exit_phi;
ebfd146a 9602
73fbfcad 9603 if (dump_enabled_p ())
78c60e3d 9604 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9605 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9606
9607 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9608 (to be used when vectorizing outer-loop stmts that use the DEF of
9609 STMT). */
9610 if (gimple_code (stmt) == GIMPLE_PHI)
9611 scalar_dest = PHI_RESULT (stmt);
9612 else
9613 scalar_dest = gimple_assign_lhs (stmt);
9614
9615 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9616 {
9617 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9618 {
9619 exit_phi = USE_STMT (use_p);
9620 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9621 }
9622 }
9623 }
9624
9625 /* Handle stmts whose DEF is used outside the loop-nest that is
9626 being vectorized. */
68a0f2ff 9627 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9628 {
68a0f2ff 9629 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
ebfd146a
IR
9630 gcc_assert (done);
9631 }
9632
9633 if (vec_stmt)
83197f37 9634 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9635
b8698a0f 9636 return is_store;
ebfd146a
IR
9637}
9638
9639
b8698a0f 9640/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9641 stmt_vec_info. */
9642
9643void
355fe088 9644vect_remove_stores (gimple *first_stmt)
ebfd146a 9645{
355fe088
TS
9646 gimple *next = first_stmt;
9647 gimple *tmp;
ebfd146a
IR
9648 gimple_stmt_iterator next_si;
9649
9650 while (next)
9651 {
78048b1c
JJ
9652 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9653
9654 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9655 if (is_pattern_stmt_p (stmt_info))
9656 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9657 /* Free the attached stmt_vec_info and remove the stmt. */
9658 next_si = gsi_for_stmt (next);
3d3f2249 9659 unlink_stmt_vdef (next);
ebfd146a 9660 gsi_remove (&next_si, true);
3d3f2249 9661 release_defs (next);
ebfd146a
IR
9662 free_stmt_vec_info (next);
9663 next = tmp;
9664 }
9665}
9666
9667
9668/* Function new_stmt_vec_info.
9669
9670 Create and initialize a new stmt_vec_info struct for STMT. */
9671
9672stmt_vec_info
310213d4 9673new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9674{
9675 stmt_vec_info res;
9676 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9677
9678 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9679 STMT_VINFO_STMT (res) = stmt;
310213d4 9680 res->vinfo = vinfo;
8644a673 9681 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9682 STMT_VINFO_LIVE_P (res) = false;
9683 STMT_VINFO_VECTYPE (res) = NULL;
9684 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9685 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9686 STMT_VINFO_IN_PATTERN_P (res) = false;
9687 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9688 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9689 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9690 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9691 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9692
ebfd146a
IR
9693 if (gimple_code (stmt) == GIMPLE_PHI
9694 && is_loop_header_bb_p (gimple_bb (stmt)))
9695 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9696 else
8644a673
IR
9697 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9698
9771b263 9699 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9700 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9701 STMT_VINFO_NUM_SLP_USES (res) = 0;
9702
e14c1050
IR
9703 GROUP_FIRST_ELEMENT (res) = NULL;
9704 GROUP_NEXT_ELEMENT (res) = NULL;
9705 GROUP_SIZE (res) = 0;
9706 GROUP_STORE_COUNT (res) = 0;
9707 GROUP_GAP (res) = 0;
9708 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9709
9710 return res;
9711}
9712
9713
9714/* Create a hash table for stmt_vec_info. */
9715
9716void
9717init_stmt_vec_info_vec (void)
9718{
9771b263
DN
9719 gcc_assert (!stmt_vec_info_vec.exists ());
9720 stmt_vec_info_vec.create (50);
ebfd146a
IR
9721}
9722
9723
9724/* Free hash table for stmt_vec_info. */
9725
9726void
9727free_stmt_vec_info_vec (void)
9728{
93675444 9729 unsigned int i;
3161455c 9730 stmt_vec_info info;
93675444
JJ
9731 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9732 if (info != NULL)
3161455c 9733 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9734 gcc_assert (stmt_vec_info_vec.exists ());
9735 stmt_vec_info_vec.release ();
ebfd146a
IR
9736}
9737
9738
9739/* Free stmt vectorization related info. */
9740
9741void
355fe088 9742free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9743{
9744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9745
9746 if (!stmt_info)
9747 return;
9748
78048b1c
JJ
9749 /* Check if this statement has a related "pattern stmt"
9750 (introduced by the vectorizer during the pattern recognition
9751 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9752 too. */
9753 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9754 {
9755 stmt_vec_info patt_info
9756 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9757 if (patt_info)
9758 {
363477c0 9759 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9760 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9761 gimple_set_bb (patt_stmt, NULL);
9762 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9763 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9764 release_ssa_name (lhs);
363477c0
JJ
9765 if (seq)
9766 {
9767 gimple_stmt_iterator si;
9768 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9769 {
355fe088 9770 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9771 gimple_set_bb (seq_stmt, NULL);
7532abf2 9772 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9773 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9774 release_ssa_name (lhs);
9775 free_stmt_vec_info (seq_stmt);
9776 }
363477c0 9777 }
f0281fde 9778 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9779 }
9780 }
9781
9771b263 9782 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9783 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9784 set_vinfo_for_stmt (stmt, NULL);
9785 free (stmt_info);
9786}
9787
9788
bb67d9c7 9789/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9790
bb67d9c7 9791 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9792 by the target. */
9793
c803b2a9 9794tree
86e36728 9795get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9796{
c7d97b28 9797 tree orig_scalar_type = scalar_type;
3bd8f481 9798 scalar_mode inner_mode;
ef4bddc2 9799 machine_mode simd_mode;
86e36728 9800 poly_uint64 nunits;
ebfd146a
IR
9801 tree vectype;
9802
3bd8f481
RS
9803 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9804 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9805 return NULL_TREE;
9806
3bd8f481 9807 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9808
7b7b1813
RG
9809 /* For vector types of elements whose mode precision doesn't
9810 match their types precision we use a element type of mode
9811 precision. The vectorization routines will have to make sure
48f2e373
RB
9812 they support the proper result truncation/extension.
9813 We also make sure to build vector types with INTEGER_TYPE
9814 component type only. */
6d7971b8 9815 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9816 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9817 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9818 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9819 TYPE_UNSIGNED (scalar_type));
6d7971b8 9820
ccbf5bb4
RG
9821 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9822 When the component mode passes the above test simply use a type
9823 corresponding to that mode. The theory is that any use that
9824 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9825 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9826 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9827 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9828
9829 /* We can't build a vector type of elements with alignment bigger than
9830 their size. */
dfc2e2ac 9831 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9832 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9833 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9834
dfc2e2ac
RB
9835 /* If we felt back to using the mode fail if there was
9836 no scalar type for it. */
9837 if (scalar_type == NULL_TREE)
9838 return NULL_TREE;
9839
bb67d9c7
RG
9840 /* If no size was supplied use the mode the target prefers. Otherwise
9841 lookup a vector mode of the specified size. */
86e36728 9842 if (known_eq (size, 0U))
bb67d9c7 9843 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9844 else if (!multiple_p (size, nbytes, &nunits)
9845 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9846 return NULL_TREE;
4c8fd8ac 9847 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9848 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9849 return NULL_TREE;
ebfd146a
IR
9850
9851 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9852
9853 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9854 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9855 return NULL_TREE;
ebfd146a 9856
c7d97b28
RB
9857 /* Re-attach the address-space qualifier if we canonicalized the scalar
9858 type. */
9859 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9860 return build_qualified_type
9861 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9862
ebfd146a
IR
9863 return vectype;
9864}
9865
86e36728 9866poly_uint64 current_vector_size;
bb67d9c7
RG
9867
9868/* Function get_vectype_for_scalar_type.
9869
9870 Returns the vector type corresponding to SCALAR_TYPE as supported
9871 by the target. */
9872
9873tree
9874get_vectype_for_scalar_type (tree scalar_type)
9875{
9876 tree vectype;
9877 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9878 current_vector_size);
9879 if (vectype
86e36728 9880 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9881 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9882 return vectype;
9883}
9884
42fd8198
IE
9885/* Function get_mask_type_for_scalar_type.
9886
9887 Returns the mask type corresponding to a result of comparison
9888 of vectors of specified SCALAR_TYPE as supported by target. */
9889
9890tree
9891get_mask_type_for_scalar_type (tree scalar_type)
9892{
9893 tree vectype = get_vectype_for_scalar_type (scalar_type);
9894
9895 if (!vectype)
9896 return NULL;
9897
9898 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9899 current_vector_size);
9900}
9901
b690cc0f
RG
9902/* Function get_same_sized_vectype
9903
9904 Returns a vector type corresponding to SCALAR_TYPE of size
9905 VECTOR_TYPE if supported by the target. */
9906
9907tree
bb67d9c7 9908get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9909{
2568d8a1 9910 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9911 return build_same_sized_truth_vector_type (vector_type);
9912
bb67d9c7
RG
9913 return get_vectype_for_scalar_type_and_size
9914 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9915}
9916
ebfd146a
IR
9917/* Function vect_is_simple_use.
9918
9919 Input:
81c40241
RB
9920 VINFO - the vect info of the loop or basic block that is being vectorized.
9921 OPERAND - operand in the loop or bb.
9922 Output:
9923 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9924 DT - the type of definition
ebfd146a
IR
9925
9926 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9927 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9928 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9929 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9930 is the case in reduction/induction computations).
9931 For basic blocks, supportable operands are constants and bb invariants.
9932 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9933
9934bool
81c40241
RB
9935vect_is_simple_use (tree operand, vec_info *vinfo,
9936 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 9937{
ebfd146a 9938 *def_stmt = NULL;
3fc356dc 9939 *dt = vect_unknown_def_type;
b8698a0f 9940
73fbfcad 9941 if (dump_enabled_p ())
ebfd146a 9942 {
78c60e3d
SS
9943 dump_printf_loc (MSG_NOTE, vect_location,
9944 "vect_is_simple_use: operand ");
9945 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 9946 dump_printf (MSG_NOTE, "\n");
ebfd146a 9947 }
b8698a0f 9948
b758f602 9949 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
9950 {
9951 *dt = vect_constant_def;
9952 return true;
9953 }
b8698a0f 9954
ebfd146a
IR
9955 if (is_gimple_min_invariant (operand))
9956 {
8644a673 9957 *dt = vect_external_def;
ebfd146a
IR
9958 return true;
9959 }
9960
ebfd146a
IR
9961 if (TREE_CODE (operand) != SSA_NAME)
9962 {
73fbfcad 9963 if (dump_enabled_p ())
af29617a
AH
9964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9965 "not ssa-name.\n");
ebfd146a
IR
9966 return false;
9967 }
b8698a0f 9968
3fc356dc 9969 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 9970 {
3fc356dc
RB
9971 *dt = vect_external_def;
9972 return true;
ebfd146a
IR
9973 }
9974
3fc356dc 9975 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 9976 if (dump_enabled_p ())
ebfd146a 9977 {
78c60e3d
SS
9978 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9979 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
9980 }
9981
61d371eb 9982 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 9983 *dt = vect_external_def;
ebfd146a
IR
9984 else
9985 {
3fc356dc 9986 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 9987 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
9988 }
9989
2e8ab70c
RB
9990 if (dump_enabled_p ())
9991 {
9992 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9993 switch (*dt)
9994 {
9995 case vect_uninitialized_def:
9996 dump_printf (MSG_NOTE, "uninitialized\n");
9997 break;
9998 case vect_constant_def:
9999 dump_printf (MSG_NOTE, "constant\n");
10000 break;
10001 case vect_external_def:
10002 dump_printf (MSG_NOTE, "external\n");
10003 break;
10004 case vect_internal_def:
10005 dump_printf (MSG_NOTE, "internal\n");
10006 break;
10007 case vect_induction_def:
10008 dump_printf (MSG_NOTE, "induction\n");
10009 break;
10010 case vect_reduction_def:
10011 dump_printf (MSG_NOTE, "reduction\n");
10012 break;
10013 case vect_double_reduction_def:
10014 dump_printf (MSG_NOTE, "double reduction\n");
10015 break;
10016 case vect_nested_cycle:
10017 dump_printf (MSG_NOTE, "nested cycle\n");
10018 break;
10019 case vect_unknown_def_type:
10020 dump_printf (MSG_NOTE, "unknown\n");
10021 break;
10022 }
10023 }
10024
81c40241 10025 if (*dt == vect_unknown_def_type)
ebfd146a 10026 {
73fbfcad 10027 if (dump_enabled_p ())
78c60e3d 10028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10029 "Unsupported pattern.\n");
ebfd146a
IR
10030 return false;
10031 }
10032
ebfd146a
IR
10033 switch (gimple_code (*def_stmt))
10034 {
10035 case GIMPLE_PHI:
ebfd146a 10036 case GIMPLE_ASSIGN:
ebfd146a 10037 case GIMPLE_CALL:
81c40241 10038 break;
ebfd146a 10039 default:
73fbfcad 10040 if (dump_enabled_p ())
78c60e3d 10041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10042 "unsupported defining stmt:\n");
ebfd146a
IR
10043 return false;
10044 }
10045
10046 return true;
10047}
10048
81c40241 10049/* Function vect_is_simple_use.
b690cc0f 10050
81c40241 10051 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10052 type of OPERAND and stores it to *VECTYPE. If the definition of
10053 OPERAND is vect_uninitialized_def, vect_constant_def or
10054 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10055 is responsible to compute the best suited vector type for the
10056 scalar operand. */
10057
10058bool
81c40241
RB
10059vect_is_simple_use (tree operand, vec_info *vinfo,
10060 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 10061{
81c40241 10062 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
10063 return false;
10064
10065 /* Now get a vector type if the def is internal, otherwise supply
10066 NULL_TREE and leave it up to the caller to figure out a proper
10067 type for the use stmt. */
10068 if (*dt == vect_internal_def
10069 || *dt == vect_induction_def
10070 || *dt == vect_reduction_def
10071 || *dt == vect_double_reduction_def
10072 || *dt == vect_nested_cycle)
10073 {
10074 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
10075
10076 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10077 && !STMT_VINFO_RELEVANT (stmt_info)
10078 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 10079 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 10080
b690cc0f
RG
10081 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10082 gcc_assert (*vectype != NULL_TREE);
10083 }
10084 else if (*dt == vect_uninitialized_def
10085 || *dt == vect_constant_def
10086 || *dt == vect_external_def)
10087 *vectype = NULL_TREE;
10088 else
10089 gcc_unreachable ();
10090
10091 return true;
10092}
10093
ebfd146a
IR
10094
10095/* Function supportable_widening_operation
10096
b8698a0f
L
10097 Check whether an operation represented by the code CODE is a
10098 widening operation that is supported by the target platform in
b690cc0f
RG
10099 vector form (i.e., when operating on arguments of type VECTYPE_IN
10100 producing a result of type VECTYPE_OUT).
b8698a0f 10101
ebfd146a
IR
10102 Widening operations we currently support are NOP (CONVERT), FLOAT
10103 and WIDEN_MULT. This function checks if these operations are supported
10104 by the target platform either directly (via vector tree-codes), or via
10105 target builtins.
10106
10107 Output:
b8698a0f
L
10108 - CODE1 and CODE2 are codes of vector operations to be used when
10109 vectorizing the operation, if available.
ebfd146a
IR
10110 - MULTI_STEP_CVT determines the number of required intermediate steps in
10111 case of multi-step conversion (like char->short->int - in that case
10112 MULTI_STEP_CVT will be 1).
b8698a0f
L
10113 - INTERM_TYPES contains the intermediate type required to perform the
10114 widening operation (short in the above example). */
ebfd146a
IR
10115
10116bool
355fe088 10117supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10118 tree vectype_out, tree vectype_in,
ebfd146a
IR
10119 enum tree_code *code1, enum tree_code *code2,
10120 int *multi_step_cvt,
9771b263 10121 vec<tree> *interm_types)
ebfd146a
IR
10122{
10123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10124 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10125 struct loop *vect_loop = NULL;
ef4bddc2 10126 machine_mode vec_mode;
81f40b79 10127 enum insn_code icode1, icode2;
ebfd146a 10128 optab optab1, optab2;
b690cc0f
RG
10129 tree vectype = vectype_in;
10130 tree wide_vectype = vectype_out;
ebfd146a 10131 enum tree_code c1, c2;
4a00c761
JJ
10132 int i;
10133 tree prev_type, intermediate_type;
ef4bddc2 10134 machine_mode intermediate_mode, prev_mode;
4a00c761 10135 optab optab3, optab4;
ebfd146a 10136
4a00c761 10137 *multi_step_cvt = 0;
4ef69dfc
IR
10138 if (loop_info)
10139 vect_loop = LOOP_VINFO_LOOP (loop_info);
10140
ebfd146a
IR
10141 switch (code)
10142 {
10143 case WIDEN_MULT_EXPR:
6ae6116f
RH
10144 /* The result of a vectorized widening operation usually requires
10145 two vectors (because the widened results do not fit into one vector).
10146 The generated vector results would normally be expected to be
10147 generated in the same order as in the original scalar computation,
10148 i.e. if 8 results are generated in each vector iteration, they are
10149 to be organized as follows:
10150 vect1: [res1,res2,res3,res4],
10151 vect2: [res5,res6,res7,res8].
10152
10153 However, in the special case that the result of the widening
10154 operation is used in a reduction computation only, the order doesn't
10155 matter (because when vectorizing a reduction we change the order of
10156 the computation). Some targets can take advantage of this and
10157 generate more efficient code. For example, targets like Altivec,
10158 that support widen_mult using a sequence of {mult_even,mult_odd}
10159 generate the following vectors:
10160 vect1: [res1,res3,res5,res7],
10161 vect2: [res2,res4,res6,res8].
10162
10163 When vectorizing outer-loops, we execute the inner-loop sequentially
10164 (each vectorized inner-loop iteration contributes to VF outer-loop
10165 iterations in parallel). We therefore don't allow to change the
10166 order of the computation in the inner-loop during outer-loop
10167 vectorization. */
10168 /* TODO: Another case in which order doesn't *really* matter is when we
10169 widen and then contract again, e.g. (short)((int)x * y >> 8).
10170 Normally, pack_trunc performs an even/odd permute, whereas the
10171 repack from an even/odd expansion would be an interleave, which
10172 would be significantly simpler for e.g. AVX2. */
10173 /* In any case, in order to avoid duplicating the code below, recurse
10174 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10175 are properly set up for the caller. If we fail, we'll continue with
10176 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10177 if (vect_loop
10178 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10179 && !nested_in_vect_loop_p (vect_loop, stmt)
10180 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10181 stmt, vectype_out, vectype_in,
a86ec597
RH
10182 code1, code2, multi_step_cvt,
10183 interm_types))
ebc047a2
CH
10184 {
10185 /* Elements in a vector with vect_used_by_reduction property cannot
10186 be reordered if the use chain with this property does not have the
10187 same operation. One such an example is s += a * b, where elements
10188 in a and b cannot be reordered. Here we check if the vector defined
10189 by STMT is only directly used in the reduction statement. */
10190 tree lhs = gimple_assign_lhs (stmt);
10191 use_operand_p dummy;
355fe088 10192 gimple *use_stmt;
ebc047a2
CH
10193 stmt_vec_info use_stmt_info = NULL;
10194 if (single_imm_use (lhs, &dummy, &use_stmt)
10195 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10196 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10197 return true;
10198 }
4a00c761
JJ
10199 c1 = VEC_WIDEN_MULT_LO_EXPR;
10200 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10201 break;
10202
81c40241
RB
10203 case DOT_PROD_EXPR:
10204 c1 = DOT_PROD_EXPR;
10205 c2 = DOT_PROD_EXPR;
10206 break;
10207
10208 case SAD_EXPR:
10209 c1 = SAD_EXPR;
10210 c2 = SAD_EXPR;
10211 break;
10212
6ae6116f
RH
10213 case VEC_WIDEN_MULT_EVEN_EXPR:
10214 /* Support the recursion induced just above. */
10215 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10216 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10217 break;
10218
36ba4aae 10219 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10220 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10221 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10222 break;
10223
ebfd146a 10224 CASE_CONVERT:
4a00c761
JJ
10225 c1 = VEC_UNPACK_LO_EXPR;
10226 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10227 break;
10228
10229 case FLOAT_EXPR:
4a00c761
JJ
10230 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10231 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10232 break;
10233
10234 case FIX_TRUNC_EXPR:
10235 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10236 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10237 computing the operation. */
10238 return false;
10239
10240 default:
10241 gcc_unreachable ();
10242 }
10243
6ae6116f 10244 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10245 std::swap (c1, c2);
4a00c761 10246
ebfd146a
IR
10247 if (code == FIX_TRUNC_EXPR)
10248 {
10249 /* The signedness is determined from output operand. */
b690cc0f
RG
10250 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10251 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10252 }
10253 else
10254 {
10255 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10256 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10257 }
10258
10259 if (!optab1 || !optab2)
10260 return false;
10261
10262 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10263 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10264 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10265 return false;
10266
4a00c761
JJ
10267 *code1 = c1;
10268 *code2 = c2;
10269
10270 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10271 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10272 /* For scalar masks we may have different boolean
10273 vector types having the same QImode. Thus we
10274 add additional check for elements number. */
10275 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10276 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10277 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10278
b8698a0f 10279 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10280 types. */
ebfd146a 10281
4a00c761
JJ
10282 prev_type = vectype;
10283 prev_mode = vec_mode;
b8698a0f 10284
4a00c761
JJ
10285 if (!CONVERT_EXPR_CODE_P (code))
10286 return false;
b8698a0f 10287
4a00c761
JJ
10288 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10289 intermediate steps in promotion sequence. We try
10290 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10291 not. */
9771b263 10292 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10293 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10294 {
10295 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10296 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10297 {
7cfb4d93 10298 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10299 if (intermediate_mode != TYPE_MODE (intermediate_type))
10300 return false;
10301 }
10302 else
10303 intermediate_type
10304 = lang_hooks.types.type_for_mode (intermediate_mode,
10305 TYPE_UNSIGNED (prev_type));
10306
4a00c761
JJ
10307 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10308 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10309
10310 if (!optab3 || !optab4
10311 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10312 || insn_data[icode1].operand[0].mode != intermediate_mode
10313 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10314 || insn_data[icode2].operand[0].mode != intermediate_mode
10315 || ((icode1 = optab_handler (optab3, intermediate_mode))
10316 == CODE_FOR_nothing)
10317 || ((icode2 = optab_handler (optab4, intermediate_mode))
10318 == CODE_FOR_nothing))
10319 break;
ebfd146a 10320
9771b263 10321 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10322 (*multi_step_cvt)++;
10323
10324 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10325 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10326 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10327 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10328 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10329
10330 prev_type = intermediate_type;
10331 prev_mode = intermediate_mode;
ebfd146a
IR
10332 }
10333
9771b263 10334 interm_types->release ();
4a00c761 10335 return false;
ebfd146a
IR
10336}
10337
10338
10339/* Function supportable_narrowing_operation
10340
b8698a0f
L
10341 Check whether an operation represented by the code CODE is a
10342 narrowing operation that is supported by the target platform in
b690cc0f
RG
10343 vector form (i.e., when operating on arguments of type VECTYPE_IN
10344 and producing a result of type VECTYPE_OUT).
b8698a0f 10345
ebfd146a 10346 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 10347 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
10348 the target platform directly via vector tree-codes.
10349
10350 Output:
b8698a0f
L
10351 - CODE1 is the code of a vector operation to be used when
10352 vectorizing the operation, if available.
ebfd146a
IR
10353 - MULTI_STEP_CVT determines the number of required intermediate steps in
10354 case of multi-step conversion (like int->short->char - in that case
10355 MULTI_STEP_CVT will be 1).
10356 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10357 narrowing operation (short in the above example). */
ebfd146a
IR
10358
10359bool
10360supportable_narrowing_operation (enum tree_code code,
b690cc0f 10361 tree vectype_out, tree vectype_in,
ebfd146a 10362 enum tree_code *code1, int *multi_step_cvt,
9771b263 10363 vec<tree> *interm_types)
ebfd146a 10364{
ef4bddc2 10365 machine_mode vec_mode;
ebfd146a
IR
10366 enum insn_code icode1;
10367 optab optab1, interm_optab;
b690cc0f
RG
10368 tree vectype = vectype_in;
10369 tree narrow_vectype = vectype_out;
ebfd146a 10370 enum tree_code c1;
3ae0661a 10371 tree intermediate_type, prev_type;
ef4bddc2 10372 machine_mode intermediate_mode, prev_mode;
ebfd146a 10373 int i;
4a00c761 10374 bool uns;
ebfd146a 10375
4a00c761 10376 *multi_step_cvt = 0;
ebfd146a
IR
10377 switch (code)
10378 {
10379 CASE_CONVERT:
10380 c1 = VEC_PACK_TRUNC_EXPR;
10381 break;
10382
10383 case FIX_TRUNC_EXPR:
10384 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10385 break;
10386
10387 case FLOAT_EXPR:
10388 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10389 tree code and optabs used for computing the operation. */
10390 return false;
10391
10392 default:
10393 gcc_unreachable ();
10394 }
10395
10396 if (code == FIX_TRUNC_EXPR)
10397 /* The signedness is determined from output operand. */
b690cc0f 10398 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10399 else
10400 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10401
10402 if (!optab1)
10403 return false;
10404
10405 vec_mode = TYPE_MODE (vectype);
947131ba 10406 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10407 return false;
10408
4a00c761
JJ
10409 *code1 = c1;
10410
10411 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10412 /* For scalar masks we may have different boolean
10413 vector types having the same QImode. Thus we
10414 add additional check for elements number. */
10415 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10416 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10417 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10418
ebfd146a
IR
10419 /* Check if it's a multi-step conversion that can be done using intermediate
10420 types. */
4a00c761 10421 prev_mode = vec_mode;
3ae0661a 10422 prev_type = vectype;
4a00c761
JJ
10423 if (code == FIX_TRUNC_EXPR)
10424 uns = TYPE_UNSIGNED (vectype_out);
10425 else
10426 uns = TYPE_UNSIGNED (vectype);
10427
10428 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10429 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10430 costly than signed. */
10431 if (code == FIX_TRUNC_EXPR && uns)
10432 {
10433 enum insn_code icode2;
10434
10435 intermediate_type
10436 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10437 interm_optab
10438 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10439 if (interm_optab != unknown_optab
4a00c761
JJ
10440 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10441 && insn_data[icode1].operand[0].mode
10442 == insn_data[icode2].operand[0].mode)
10443 {
10444 uns = false;
10445 optab1 = interm_optab;
10446 icode1 = icode2;
10447 }
10448 }
ebfd146a 10449
4a00c761
JJ
10450 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10451 intermediate steps in promotion sequence. We try
10452 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10453 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10454 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10455 {
10456 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10457 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10458 {
7cfb4d93 10459 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10460 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10461 return false;
3ae0661a
IE
10462 }
10463 else
10464 intermediate_type
10465 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10466 interm_optab
10467 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10468 optab_default);
10469 if (!interm_optab
10470 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10471 || insn_data[icode1].operand[0].mode != intermediate_mode
10472 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10473 == CODE_FOR_nothing))
10474 break;
10475
9771b263 10476 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10477 (*multi_step_cvt)++;
10478
10479 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10480 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10481 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10482 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10483
10484 prev_mode = intermediate_mode;
3ae0661a 10485 prev_type = intermediate_type;
4a00c761 10486 optab1 = interm_optab;
ebfd146a
IR
10487 }
10488
9771b263 10489 interm_types->release ();
4a00c761 10490 return false;
ebfd146a 10491}
7cfb4d93
RS
10492
10493/* Generate and return a statement that sets vector mask MASK such that
10494 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10495
10496gcall *
10497vect_gen_while (tree mask, tree start_index, tree end_index)
10498{
10499 tree cmp_type = TREE_TYPE (start_index);
10500 tree mask_type = TREE_TYPE (mask);
10501 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10502 cmp_type, mask_type,
10503 OPTIMIZE_FOR_SPEED));
10504 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10505 start_index, end_index,
10506 build_zero_cst (mask_type));
10507 gimple_call_set_lhs (call, mask);
10508 return call;
10509}
535e7c11
RS
10510
10511/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10512 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10513
10514tree
10515vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10516 tree end_index)
10517{
10518 tree tmp = make_ssa_name (mask_type);
10519 gcall *call = vect_gen_while (tmp, start_index, end_index);
10520 gimple_seq_add_stmt (seq, call);
10521 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10522}