]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
Add support for bitwise reductions
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
ebfd146a 53
7ee2468b
SB
54/* For lang_hooks.types.type_for_mode. */
55#include "langhooks.h"
ebfd146a 56
c3e7ee41
BS
57/* Return the vectorized type for the given statement. */
58
59tree
60stmt_vectype (struct _stmt_vec_info *stmt_info)
61{
62 return STMT_VINFO_VECTYPE (stmt_info);
63}
64
65/* Return TRUE iff the given statement is in an inner loop relative to
66 the loop being vectorized. */
67bool
68stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
69{
355fe088 70 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
71 basic_block bb = gimple_bb (stmt);
72 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
73 struct loop* loop;
74
75 if (!loop_vinfo)
76 return false;
77
78 loop = LOOP_VINFO_LOOP (loop_vinfo);
79
80 return (bb->loop_father == loop->inner);
81}
82
83/* Record the cost of a statement, either by directly informing the
84 target model or by saving it in a vector for later processing.
85 Return a preliminary estimate of the statement's cost. */
86
87unsigned
92345349 88record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 89 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 90 int misalign, enum vect_cost_model_location where)
c3e7ee41 91{
cc9fe6bb
JH
92 if ((kind == vector_load || kind == unaligned_load)
93 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
94 kind = vector_gather_load;
95 if ((kind == vector_store || kind == unaligned_store)
96 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
97 kind = vector_scatter_store;
92345349 98 if (body_cost_vec)
c3e7ee41 99 {
92345349 100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
c3e7ee41 105 return (unsigned)
92345349 106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
107 }
108 else
310213d4
RB
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
111}
112
272c6793
RS
113/* Return a variable of type ELEM_TYPE[NELEMS]. */
114
115static tree
116create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117{
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
120}
121
122/* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
126
127static tree
355fe088 128read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
129 tree array, unsigned HOST_WIDE_INT n)
130{
131 tree vect_type, vect, vect_name, array_ref;
355fe088 132 gimple *new_stmt;
272c6793
RS
133
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
140
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
145
146 return vect_name;
147}
148
149/* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
152
153static void
355fe088 154write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
155 tree array, unsigned HOST_WIDE_INT n)
156{
157 tree array_ref;
355fe088 158 gimple *new_stmt;
272c6793
RS
159
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
163
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
166}
167
168/* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
171
172static tree
44fc7854 173create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 174{
44fc7854 175 tree mem_ref;
272c6793 176
272c6793
RS
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
644ffefd 179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
180 return mem_ref;
181}
182
ebfd146a
IR
183/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
184
185/* Function vect_mark_relevant.
186
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
188
189static void
355fe088 190vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 191 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
192{
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 196 gimple *pattern_stmt;
ebfd146a 197
73fbfcad 198 if (dump_enabled_p ())
66c16fd9
RB
199 {
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
203 }
ebfd146a 204
83197f37
IR
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
ebfd146a
IR
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
210 {
97ecdb46
JJ
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
83197f37 215
97ecdb46
JJ
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
217
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
ebfd146a
IR
227 }
228
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
232
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
235 {
73fbfcad 236 if (dump_enabled_p ())
78c60e3d 237 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 238 "already marked relevant/live.\n");
ebfd146a
IR
239 return;
240 }
241
9771b263 242 worklist->safe_push (stmt);
ebfd146a
IR
243}
244
245
b28ead45
AH
246/* Function is_simple_and_all_uses_invariant
247
248 Return true if STMT is simple and all uses of it are invariant. */
249
250bool
251is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
252{
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
256
257 if (!is_gimple_assign (stmt))
258 return false;
259
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
261 {
262 enum vect_def_type dt = vect_uninitialized_def;
263
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
265 {
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
270 }
271
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
274 }
275 return true;
276}
277
ebfd146a
IR
278/* Function vect_stmt_relevant_p.
279
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
282
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
287
288 CHECKME: what other side effects would the vectorizer allow? */
289
290static bool
355fe088 291vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
292 enum vect_relevant *relevant, bool *live_p)
293{
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
299
8644a673 300 *relevant = vect_unused_in_scope;
ebfd146a
IR
301 *live_p = false;
302
303 /* cond stmt other than loop exit cond. */
b8698a0f
L
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
8644a673 307 *relevant = vect_used_in_scope;
ebfd146a
IR
308
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
ebfd146a 313 {
73fbfcad 314 if (dump_enabled_p ())
78c60e3d 315 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 316 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 317 *relevant = vect_used_in_scope;
ebfd146a
IR
318 }
319
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
322 {
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
324 {
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
327 {
73fbfcad 328 if (dump_enabled_p ())
78c60e3d 329 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 330 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 331
3157b0c2
AO
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
334
ebfd146a
IR
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
339
340 *live_p = true;
341 }
342 }
343 }
344
3a2edf4c
AH
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
347 {
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
352 }
353
ebfd146a
IR
354 return (*live_p || *relevant);
355}
356
357
b8698a0f 358/* Function exist_non_indexing_operands_for_use_p
ebfd146a 359
ff802fa1 360 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
361 used in STMT for anything other than indexing an array. */
362
363static bool
355fe088 364exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
365{
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 368
ff802fa1 369 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
59a05b0c 374
ebfd146a
IR
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
380
381 'var' in the second case corresponds to a def, not a use,
b8698a0f 382 so USE cannot correspond to any operands that are not used
ebfd146a
IR
383 for array indexing.
384
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
ebfd146a
IR
387
388 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
389 {
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
393 {
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
406 }
407 return false;
408 }
409
59a05b0c
EB
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
ebfd146a 412 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
415
416 if (operand == use)
417 return true;
418
419 return false;
420}
421
422
b8698a0f 423/*
ebfd146a
IR
424 Function process_use.
425
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 429 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
ebfd146a
IR
433
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 441 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 442 of the respective DEF_STMT is left unchanged.
b8698a0f
L
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
447
448 Return true if everything is as expected. Return false otherwise. */
449
450static bool
b28ead45 451process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 452 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 453 bool force)
ebfd146a
IR
454{
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
355fe088 459 gimple *def_stmt;
ebfd146a
IR
460 enum vect_def_type dt;
461
b8698a0f 462 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 463 that are used for address computation are not considered relevant. */
aec7ae7d 464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
465 return true;
466
81c40241 467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 468 {
73fbfcad 469 if (dump_enabled_p ())
78c60e3d 470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 471 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
472 return false;
473 }
474
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
477
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
480 {
73fbfcad 481 if (dump_enabled_p ())
e645e942 482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
483 return true;
484 }
485
b8698a0f
L
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
498 {
73fbfcad 499 if (dump_enabled_p ())
78c60e3d 500 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 501 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
507 return true;
508 }
509
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
518 {
73fbfcad 519 if (dump_enabled_p ())
78c60e3d 520 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 521 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 522
ebfd146a
IR
523 switch (relevant)
524 {
8644a673 525 case vect_unused_in_scope:
7c5222ff
IR
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 528 break;
7c5222ff 529
ebfd146a 530 case vect_used_in_outer_by_reduction:
7c5222ff 531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
532 relevant = vect_used_by_reduction;
533 break;
7c5222ff 534
ebfd146a 535 case vect_used_in_outer:
7c5222ff 536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 537 relevant = vect_used_in_scope;
ebfd146a 538 break;
7c5222ff 539
8644a673 540 case vect_used_in_scope:
ebfd146a
IR
541 break;
542
543 default:
544 gcc_unreachable ();
b8698a0f 545 }
ebfd146a
IR
546 }
547
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
550 ...
551 inner-loop:
552 d = def_stmt
06066f92 553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
556 {
73fbfcad 557 if (dump_enabled_p ())
78c60e3d 558 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 559 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 560
ebfd146a
IR
561 switch (relevant)
562 {
8644a673 563 case vect_unused_in_scope:
b8698a0f 564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
567 break;
568
ebfd146a 569 case vect_used_by_reduction:
b28ead45 570 case vect_used_only_live:
ebfd146a
IR
571 relevant = vect_used_in_outer_by_reduction;
572 break;
573
8644a673 574 case vect_used_in_scope:
ebfd146a
IR
575 relevant = vect_used_in_outer;
576 break;
577
578 default:
579 gcc_unreachable ();
580 }
581 }
643a9684
RB
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
584 and cause hybrid SLP for SLP inductions. Unless the PHI is live
585 of course. */
643a9684
RB
586 else if (gimple_code (stmt) == GIMPLE_PHI
587 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 588 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
589 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
590 == use))
591 {
592 if (dump_enabled_p ())
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "induction value on backedge.\n");
595 return true;
596 }
597
ebfd146a 598
b28ead45 599 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
600 return true;
601}
602
603
604/* Function vect_mark_stmts_to_be_vectorized.
605
606 Not all stmts in the loop need to be vectorized. For example:
607
608 for i...
609 for j...
610 1. T0 = i + j
611 2. T1 = a[T0]
612
613 3. j = j + 1
614
615 Stmt 1 and 3 do not need to be vectorized, because loop control and
616 addressing of vectorized data-refs are handled differently.
617
618 This pass detects such stmts. */
619
620bool
621vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
622{
ebfd146a
IR
623 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
624 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
625 unsigned int nbbs = loop->num_nodes;
626 gimple_stmt_iterator si;
355fe088 627 gimple *stmt;
ebfd146a
IR
628 unsigned int i;
629 stmt_vec_info stmt_vinfo;
630 basic_block bb;
355fe088 631 gimple *phi;
ebfd146a 632 bool live_p;
b28ead45 633 enum vect_relevant relevant;
ebfd146a 634
73fbfcad 635 if (dump_enabled_p ())
78c60e3d 636 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 637 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 638
355fe088 639 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
640
641 /* 1. Init worklist. */
642 for (i = 0; i < nbbs; i++)
643 {
644 bb = bbs[i];
645 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 646 {
ebfd146a 647 phi = gsi_stmt (si);
73fbfcad 648 if (dump_enabled_p ())
ebfd146a 649 {
78c60e3d
SS
650 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
652 }
653
654 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 655 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
656 }
657 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
658 {
659 stmt = gsi_stmt (si);
73fbfcad 660 if (dump_enabled_p ())
ebfd146a 661 {
78c60e3d
SS
662 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 664 }
ebfd146a
IR
665
666 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 667 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
668 }
669 }
670
671 /* 2. Process_worklist */
9771b263 672 while (worklist.length () > 0)
ebfd146a
IR
673 {
674 use_operand_p use_p;
675 ssa_op_iter iter;
676
9771b263 677 stmt = worklist.pop ();
73fbfcad 678 if (dump_enabled_p ())
ebfd146a 679 {
78c60e3d
SS
680 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
681 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
682 }
683
b8698a0f 684 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
685 (DEF_STMT) as relevant/irrelevant according to the relevance property
686 of STMT. */
ebfd146a
IR
687 stmt_vinfo = vinfo_for_stmt (stmt);
688 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 689
b28ead45
AH
690 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
691 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
692
693 One exception is when STMT has been identified as defining a reduction
b28ead45 694 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 695 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 696 those that are used by a reduction computation, and those that are
ff802fa1 697 (also) used by a regular computation. This allows us later on to
b8698a0f 698 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 699 order of the results that they produce does not have to be kept. */
ebfd146a 700
b28ead45 701 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 702 {
06066f92 703 case vect_reduction_def:
b28ead45
AH
704 gcc_assert (relevant != vect_unused_in_scope);
705 if (relevant != vect_unused_in_scope
706 && relevant != vect_used_in_scope
707 && relevant != vect_used_by_reduction
708 && relevant != vect_used_only_live)
06066f92 709 {
b28ead45
AH
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of reduction.\n");
713 return false;
06066f92 714 }
06066f92 715 break;
b8698a0f 716
06066f92 717 case vect_nested_cycle:
b28ead45
AH
718 if (relevant != vect_unused_in_scope
719 && relevant != vect_used_in_outer_by_reduction
720 && relevant != vect_used_in_outer)
06066f92 721 {
73fbfcad 722 if (dump_enabled_p ())
78c60e3d 723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 724 "unsupported use of nested cycle.\n");
7c5222ff 725
06066f92
IR
726 return false;
727 }
b8698a0f
L
728 break;
729
06066f92 730 case vect_double_reduction_def:
b28ead45
AH
731 if (relevant != vect_unused_in_scope
732 && relevant != vect_used_by_reduction
733 && relevant != vect_used_only_live)
06066f92 734 {
73fbfcad 735 if (dump_enabled_p ())
78c60e3d 736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 737 "unsupported use of double reduction.\n");
7c5222ff 738
7c5222ff 739 return false;
06066f92 740 }
b8698a0f 741 break;
7c5222ff 742
06066f92
IR
743 default:
744 break;
7c5222ff 745 }
b8698a0f 746
aec7ae7d 747 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
748 {
749 /* Pattern statements are not inserted into the code, so
750 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
751 have to scan the RHS or function arguments instead. */
752 if (is_gimple_assign (stmt))
753 {
69d2aade
JJ
754 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
755 tree op = gimple_assign_rhs1 (stmt);
756
757 i = 1;
758 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
759 {
760 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 761 relevant, &worklist, false)
69d2aade 762 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 763 relevant, &worklist, false))
566d377a 764 return false;
69d2aade
JJ
765 i = 2;
766 }
767 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 768 {
69d2aade 769 op = gimple_op (stmt, i);
afbe6325 770 if (TREE_CODE (op) == SSA_NAME
b28ead45 771 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 772 &worklist, false))
07687835 773 return false;
9d5e7640
IR
774 }
775 }
776 else if (is_gimple_call (stmt))
777 {
778 for (i = 0; i < gimple_call_num_args (stmt); i++)
779 {
780 tree arg = gimple_call_arg (stmt, i);
b28ead45 781 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 782 &worklist, false))
07687835 783 return false;
9d5e7640
IR
784 }
785 }
786 }
787 else
788 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
789 {
790 tree op = USE_FROM_PTR (use_p);
b28ead45 791 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 792 &worklist, false))
07687835 793 return false;
9d5e7640 794 }
aec7ae7d 795
3bab6342 796 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 797 {
134c85ca
RS
798 gather_scatter_info gs_info;
799 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
800 gcc_unreachable ();
801 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
802 &worklist, true))
566d377a 803 return false;
aec7ae7d 804 }
ebfd146a
IR
805 } /* while worklist */
806
ebfd146a
IR
807 return true;
808}
809
810
b8698a0f 811/* Function vect_model_simple_cost.
ebfd146a 812
b8698a0f 813 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
814 single op. Right now, this does not account for multiple insns that could
815 be generated for the single vector op. We will handle that shortly. */
816
817void
b8698a0f 818vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 819 enum vect_def_type *dt,
4fc5ebf1 820 int ndts,
92345349
BS
821 stmt_vector_for_cost *prologue_cost_vec,
822 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
823{
824 int i;
92345349 825 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
826
827 /* The SLP costs were already calculated during SLP tree build. */
828 if (PURE_SLP_STMT (stmt_info))
829 return;
830
4fc5ebf1
JG
831 /* Cost the "broadcast" of a scalar operand in to a vector operand.
832 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833 cost model. */
834 for (i = 0; i < ndts; i++)
92345349 835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
4fc5ebf1 836 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
92345349 837 stmt_info, 0, vect_prologue);
c3e7ee41
BS
838
839 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
840 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 stmt_info, 0, vect_body);
c3e7ee41 842
73fbfcad 843 if (dump_enabled_p ())
78c60e3d
SS
844 dump_printf_loc (MSG_NOTE, vect_location,
845 "vect_model_simple_cost: inside_cost = %d, "
e645e942 846 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
847}
848
849
8bd37302
BS
850/* Model cost for type demotion and promotion operations. PWR is normally
851 zero for single-step promotions and demotions. It will be one if
852 two-step promotion/demotion is required, and so on. Each additional
853 step doubles the number of instructions required. */
854
855static void
856vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 enum vect_def_type *dt, int pwr)
858{
859 int i, tmp;
92345349 860 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863 void *target_cost_data;
8bd37302
BS
864
865 /* The SLP costs were already calculated during SLP tree build. */
866 if (PURE_SLP_STMT (stmt_info))
867 return;
868
c3e7ee41
BS
869 if (loop_vinfo)
870 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
871 else
872 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
873
8bd37302
BS
874 for (i = 0; i < pwr + 1; i++)
875 {
876 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
877 (i + 1) : i;
c3e7ee41 878 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
879 vec_promote_demote, stmt_info, 0,
880 vect_body);
8bd37302
BS
881 }
882
883 /* FORNOW: Assuming maximum 2 args per stmts. */
884 for (i = 0; i < 2; i++)
92345349
BS
885 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
886 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
887 stmt_info, 0, vect_prologue);
8bd37302 888
73fbfcad 889 if (dump_enabled_p ())
78c60e3d
SS
890 dump_printf_loc (MSG_NOTE, vect_location,
891 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 892 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
893}
894
ebfd146a
IR
895/* Function vect_model_store_cost
896
0d0293ac
MM
897 Models cost for stores. In the case of grouped accesses, one access
898 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
899
900void
b8698a0f 901vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee 902 vect_memory_access_type memory_access_type,
9ce4345a 903 vec_load_store_type vls_type, slp_tree slp_node,
92345349
BS
904 stmt_vector_for_cost *prologue_cost_vec,
905 stmt_vector_for_cost *body_cost_vec)
ebfd146a 906{
92345349 907 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
908 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
909 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
910 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 911
9ce4345a 912 if (vls_type == VLS_STORE_INVARIANT)
92345349
BS
913 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
914 stmt_info, 0, vect_prologue);
ebfd146a 915
892a981f
RS
916 /* Grouped stores update all elements in the group at once,
917 so we want the DR for the first statement. */
918 if (!slp_node && grouped_access_p)
720f5239 919 {
892a981f
RS
920 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
921 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 922 }
ebfd146a 923
892a981f
RS
924 /* True if we should include any once-per-group costs as well as
925 the cost of the statement itself. For SLP we only get called
926 once per group anyhow. */
927 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
928
272c6793 929 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 930 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 931 access is instead being provided by a permute-and-store operation,
2de001ee
RS
932 include the cost of the permutes. */
933 if (first_stmt_p
934 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 935 {
e1377713
ES
936 /* Uses a high and low interleave or shuffle operations for each
937 needed permute. */
892a981f 938 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
ebfd146a 942
73fbfcad 943 if (dump_enabled_p ())
78c60e3d 944 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 945 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 946 group_size);
ebfd146a
IR
947 }
948
cee62fee 949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 950 /* Costs of the stores. */
067bc855
RB
951 if (memory_access_type == VMAT_ELEMENTWISE
952 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
953 {
954 /* N scalar stores plus extracting the elements. */
955 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
956 inside_cost += record_stmt_cost (body_cost_vec,
957 ncopies * assumed_nunits,
958 scalar_store, stmt_info, 0, vect_body);
959 }
f2e2a985 960 else
892a981f 961 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 962
2de001ee
RS
963 if (memory_access_type == VMAT_ELEMENTWISE
964 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
965 {
966 /* N scalar stores plus extracting the elements. */
967 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
968 inside_cost += record_stmt_cost (body_cost_vec,
969 ncopies * assumed_nunits,
970 vec_to_scalar, stmt_info, 0, vect_body);
971 }
cee62fee 972
73fbfcad 973 if (dump_enabled_p ())
78c60e3d
SS
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_store_cost: inside_cost = %d, "
e645e942 976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
977}
978
979
720f5239
IR
980/* Calculate cost of DR's memory access. */
981void
982vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 983 unsigned int *inside_cost,
92345349 984 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
985{
986 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 987 gimple *stmt = DR_STMT (dr);
c3e7ee41 988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
989
990 switch (alignment_support_scheme)
991 {
992 case dr_aligned:
993 {
92345349
BS
994 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
995 vector_store, stmt_info, 0,
996 vect_body);
720f5239 997
73fbfcad 998 if (dump_enabled_p ())
78c60e3d 999 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1000 "vect_model_store_cost: aligned.\n");
720f5239
IR
1001 break;
1002 }
1003
1004 case dr_unaligned_supported:
1005 {
720f5239 1006 /* Here, we assign an additional cost for the unaligned store. */
92345349 1007 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1008 unaligned_store, stmt_info,
92345349 1009 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1010 if (dump_enabled_p ())
78c60e3d
SS
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "vect_model_store_cost: unaligned supported by "
e645e942 1013 "hardware.\n");
720f5239
IR
1014 break;
1015 }
1016
38eec4c6
UW
1017 case dr_unaligned_unsupported:
1018 {
1019 *inside_cost = VECT_MAX_COST;
1020
73fbfcad 1021 if (dump_enabled_p ())
78c60e3d 1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1023 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1024 break;
1025 }
1026
720f5239
IR
1027 default:
1028 gcc_unreachable ();
1029 }
1030}
1031
1032
ebfd146a
IR
1033/* Function vect_model_load_cost
1034
892a981f
RS
1035 Models cost for loads. In the case of grouped accesses, one access has
1036 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1037 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1038 access scheme chosen. */
1039
1040void
92345349 1041vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
1042 vect_memory_access_type memory_access_type,
1043 slp_tree slp_node,
92345349
BS
1044 stmt_vector_for_cost *prologue_cost_vec,
1045 stmt_vector_for_cost *body_cost_vec)
ebfd146a 1046{
892a981f
RS
1047 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1048 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1049 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1050 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1051
892a981f
RS
1052 /* Grouped loads read all elements in the group at once,
1053 so we want the DR for the first statement. */
1054 if (!slp_node && grouped_access_p)
ebfd146a 1055 {
892a981f
RS
1056 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1057 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1058 }
1059
892a981f
RS
1060 /* True if we should include any once-per-group costs as well as
1061 the cost of the statement itself. For SLP we only get called
1062 once per group anyhow. */
1063 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1064
272c6793 1065 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1066 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1067 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1068 include the cost of the permutes. */
1069 if (first_stmt_p
1070 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1071 {
2c23db6d
ES
1072 /* Uses an even and odd extract operations or shuffle operations
1073 for each needed permute. */
892a981f 1074 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d
ES
1075 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1076 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
ebfd146a 1078
73fbfcad 1079 if (dump_enabled_p ())
e645e942
TJ
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1082 group_size);
ebfd146a
IR
1083 }
1084
1085 /* The loads themselves. */
067bc855
RB
1086 if (memory_access_type == VMAT_ELEMENTWISE
1087 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1088 {
a21892ad
BS
1089 /* N scalar loads plus gathering them into a vector. */
1090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1091 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
92345349 1092 inside_cost += record_stmt_cost (body_cost_vec,
c5126ce8 1093 ncopies * assumed_nunits,
92345349 1094 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1095 }
1096 else
892a981f 1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349
BS
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
2de001ee
RS
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
892a981f
RS
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
720f5239 1104
73fbfcad 1105 if (dump_enabled_p ())
78c60e3d
SS
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
e645e942 1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1109}
1110
1111
1112/* Calculate cost of DR's memory access. */
1113void
1114vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1115 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
720f5239
IR
1120{
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1122 gimple *stmt = DR_STMT (dr);
c3e7ee41 1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1124
1125 switch (alignment_support_scheme)
ebfd146a
IR
1126 {
1127 case dr_aligned:
1128 {
92345349
BS
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
ebfd146a 1131
73fbfcad 1132 if (dump_enabled_p ())
78c60e3d 1133 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1134 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1135
1136 break;
1137 }
1138 case dr_unaligned_supported:
1139 {
720f5239 1140 /* Here, we assign an additional cost for the unaligned load. */
92345349 1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1142 unaligned_load, stmt_info,
92345349 1143 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1144
73fbfcad 1145 if (dump_enabled_p ())
78c60e3d
SS
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
e645e942 1148 "hardware.\n");
ebfd146a
IR
1149
1150 break;
1151 }
1152 case dr_explicit_realign:
1153 {
92345349
BS
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1158
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
92345349 1161 prologue costs. */
ebfd146a 1162 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
ebfd146a 1165
73fbfcad 1166 if (dump_enabled_p ())
e645e942
TJ
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
8bd37302 1169
ebfd146a
IR
1170 break;
1171 }
1172 case dr_explicit_realign_optimized:
1173 {
73fbfcad 1174 if (dump_enabled_p ())
e645e942 1175 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1176 "vect_model_load_cost: unaligned software "
e645e942 1177 "pipelined.\n");
ebfd146a
IR
1178
1179 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1180 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1181 if this is an access in a group of loads, which provide grouped
ebfd146a 1182 access, then the above cost should only be considered for one
ff802fa1 1183 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1184 and a realignment op. */
1185
92345349 1186 if (add_realign_cost && record_prologue_costs)
ebfd146a 1187 {
92345349
BS
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
ebfd146a 1191 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
ebfd146a
IR
1195 }
1196
92345349
BS
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
8bd37302 1201
73fbfcad 1202 if (dump_enabled_p ())
78c60e3d 1203 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
8bd37302 1206
ebfd146a
IR
1207 break;
1208 }
1209
38eec4c6
UW
1210 case dr_unaligned_unsupported:
1211 {
1212 *inside_cost = VECT_MAX_COST;
1213
73fbfcad 1214 if (dump_enabled_p ())
78c60e3d 1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1216 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1217 break;
1218 }
1219
ebfd146a
IR
1220 default:
1221 gcc_unreachable ();
1222 }
ebfd146a
IR
1223}
1224
418b7df3
RG
1225/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1227
418b7df3 1228static void
355fe088 1229vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1230{
ebfd146a 1231 if (gsi)
418b7df3 1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1233 else
1234 {
418b7df3 1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1237
a70d6342
IR
1238 if (loop_vinfo)
1239 {
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1241 basic_block new_bb;
1242 edge pe;
a70d6342
IR
1243
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
b8698a0f 1246
a70d6342 1247 pe = loop_preheader_edge (loop);
418b7df3 1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1249 gcc_assert (!new_bb);
1250 }
1251 else
1252 {
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1256
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1259 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1261 }
ebfd146a
IR
1262 }
1263
73fbfcad 1264 if (dump_enabled_p ())
ebfd146a 1265 {
78c60e3d
SS
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1269 }
418b7df3
RG
1270}
1271
1272/* Function vect_init_vector.
ebfd146a 1273
5467ee52
RG
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
418b7df3
RG
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1281
1282tree
355fe088 1283vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1284{
355fe088 1285 gimple *init_stmt;
418b7df3
RG
1286 tree new_temp;
1287
e412ece4
RB
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1290 {
e412ece4
RB
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1293 {
5a308cf1
IE
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1297 {
b3d51f23
IE
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1300
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1304 {
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1310 }
1311 }
1312 else if (CONSTANT_CLASS_P (val))
42fd8198 1313 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1314 else
1315 {
b731b390 1316 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1324 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1325 val = new_temp;
418b7df3
RG
1326 }
1327 }
5467ee52 1328 val = build_vector_from_val (type, val);
418b7df3
RG
1329 }
1330
0e22bb5a
RB
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1333 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1334 return new_temp;
ebfd146a
IR
1335}
1336
c83a894c 1337/* Function vect_get_vec_def_for_operand_1.
a70d6342 1338
c83a894c
AH
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1341
1342tree
c83a894c 1343vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1344{
1345 tree vec_oprnd;
355fe088 1346 gimple *vec_stmt;
ebfd146a 1347 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1348
1349 switch (dt)
1350 {
81c40241 1351 /* operand is a constant or a loop invariant. */
ebfd146a 1352 case vect_constant_def:
81c40241 1353 case vect_external_def:
c83a894c
AH
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
ebfd146a 1356
81c40241 1357 /* operand is defined inside the loop. */
8644a673 1358 case vect_internal_def:
ebfd146a 1359 {
ebfd146a
IR
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1362
ebfd146a 1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1378 }
1379
c78e3652 1380 /* operand is defined by a loop header phi. */
ebfd146a 1381 case vect_reduction_def:
06066f92 1382 case vect_double_reduction_def:
7c5222ff 1383 case vect_nested_cycle:
ebfd146a
IR
1384 case vect_induction_def:
1385 {
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1387
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1395 return vec_oprnd;
1396 }
1397
1398 default:
1399 gcc_unreachable ();
1400 }
1401}
1402
1403
c83a894c
AH
1404/* Function vect_get_vec_def_for_operand.
1405
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1408
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1411
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1415
1416tree
1417vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1418{
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1424
1425 if (dump_enabled_p ())
1426 {
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1431 }
1432
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1436 {
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1439 }
1440
1441 if (dt == vect_constant_def || dt == vect_external_def)
1442 {
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1445
1446 if (vectype)
1447 vector_type = vectype;
2568d8a1 1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1453
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1456 }
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1459}
1460
1461
ebfd146a
IR
1462/* Function vect_get_vec_def_for_stmt_copy
1463
ff802fa1 1464 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1467 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1469 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1470 DT is the type of the vector def VEC_OPRND.
1471
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1475 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1476 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1480 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1483
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1485
ebfd146a
IR
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1489 VS1.3: vx.3 = memref3
ebfd146a
IR
1490
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1495
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
b8698a0f
L
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1500 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1501 returns the vector-def 'vx.0'.
1502
b8698a0f
L
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1507
b8698a0f
L
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1517
1518tree
1519vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1520{
355fe088 1521 gimple *vec_stmt_for_operand;
ebfd146a
IR
1522 stmt_vec_info def_stmt_info;
1523
1524 /* Do nothing; can reuse same def. */
8644a673 1525 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1526 return vec_oprnd;
1527
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1538}
1539
1540
1541/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1543
c78e3652 1544void
b8698a0f 1545vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
ebfd146a 1548{
9771b263 1549 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1550
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1552 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1553
9771b263 1554 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1555 {
9771b263 1556 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1558 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1559 }
1560}
1561
1562
c78e3652 1563/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1564
c78e3652 1565void
355fe088 1566vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
306b0c92 1569 slp_tree slp_node)
ebfd146a
IR
1570{
1571 if (slp_node)
d092494c
IR
1572 {
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1576
9771b263 1577 ops.quick_push (op0);
d092494c 1578 if (op1)
9771b263 1579 ops.quick_push (op1);
d092494c 1580
306b0c92 1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1582
37b5ec8f 1583 *vec_oprnds0 = vec_defs[0];
d092494c 1584 if (op1)
37b5ec8f 1585 *vec_oprnds1 = vec_defs[1];
d092494c 1586 }
ebfd146a
IR
1587 else
1588 {
1589 tree vec_oprnd;
1590
9771b263 1591 vec_oprnds0->create (1);
81c40241 1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1593 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1594
1595 if (op1)
1596 {
9771b263 1597 vec_oprnds1->create (1);
81c40241 1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1599 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1600 }
1601 }
1602}
1603
1604
1605/* Function vect_finish_stmt_generation.
1606
1607 Insert a new stmt. */
1608
1609void
355fe088 1610vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1611 gimple_stmt_iterator *gsi)
1612{
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1614 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1615
1616 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1617
54e8e2c3
RG
1618 if (!gsi_end_p (*gsi)
1619 && gimple_has_mem_ops (vec_stmt))
1620 {
355fe088 1621 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1622 tree vuse = gimple_vuse (at_stmt);
1623 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1624 {
1625 tree vdef = gimple_vdef (at_stmt);
1626 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1632 && ((is_gimple_assign (vec_stmt)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1634 || (is_gimple_call (vec_stmt)
1635 && !(gimple_call_flags (vec_stmt)
1636 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1637 {
1638 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1639 gimple_set_vdef (vec_stmt, new_vdef);
1640 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1641 }
1642 }
1643 }
ebfd146a
IR
1644 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1645
310213d4 1646 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1647
73fbfcad 1648 if (dump_enabled_p ())
ebfd146a 1649 {
78c60e3d
SS
1650 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1652 }
1653
ad885386 1654 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1655
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr = lookup_stmt_eh_lp (stmt);
1660 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1661 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1662}
1663
70439f0d
RS
1664/* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1668
70439f0d
RS
1669static internal_fn
1670vectorizable_internal_function (combined_fn cfn, tree fndecl,
1671 tree vectype_out, tree vectype_in)
ebfd146a 1672{
70439f0d
RS
1673 internal_fn ifn;
1674 if (internal_fn_p (cfn))
1675 ifn = as_internal_fn (cfn);
1676 else
1677 ifn = associated_internal_fn (fndecl);
1678 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1679 {
1680 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1681 if (info.vectorizable)
1682 {
1683 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1684 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1685 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1686 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1687 return ifn;
1688 }
1689 }
1690 return IFN_LAST;
ebfd146a
IR
1691}
1692
5ce9450f 1693
355fe088 1694static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1695 gimple_stmt_iterator *);
1696
62da9e14
RS
1697/* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1700
1701static int
1702compare_step_with_zero (gimple *stmt)
1703{
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
1705 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1706 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1707 size_zero_node);
62da9e14
RS
1708}
1709
1710/* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1712
1713static tree
1714perm_mask_for_reverse (tree vectype)
1715{
928686b1 1716 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 1717
d980067b
RS
1718 /* The encoding has a single stepped pattern. */
1719 vec_perm_builder sel (nunits, 1, 3);
928686b1 1720 for (int i = 0; i < 3; ++i)
908a1a16 1721 sel.quick_push (nunits - 1 - i);
62da9e14 1722
e3342de4
RS
1723 vec_perm_indices indices (sel, 1, nunits);
1724 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 1725 return NULL_TREE;
e3342de4 1726 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 1727}
5ce9450f 1728
c3a8f964
RS
1729/* STMT is either a masked or unconditional store. Return the value
1730 being stored. */
1731
1732static tree
1733vect_get_store_rhs (gimple *stmt)
1734{
1735 if (gassign *assign = dyn_cast <gassign *> (stmt))
1736 {
1737 gcc_assert (gimple_assign_single_p (assign));
1738 return gimple_assign_rhs1 (assign);
1739 }
1740 if (gcall *call = dyn_cast <gcall *> (stmt))
1741 {
1742 internal_fn ifn = gimple_call_internal_fn (call);
1743 gcc_assert (ifn == IFN_MASK_STORE);
1744 return gimple_call_arg (stmt, 3);
1745 }
1746 gcc_unreachable ();
1747}
1748
2de001ee
RS
1749/* A subroutine of get_load_store_type, with a subset of the same
1750 arguments. Handle the case where STMT is part of a grouped load
1751 or store.
1752
1753 For stores, the statements in the group are all consecutive
1754 and there is no gap at the end. For loads, the statements in the
1755 group might not be consecutive; there can be gaps between statements
1756 as well as at the end. */
1757
1758static bool
1759get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 1760 bool masked_p, vec_load_store_type vls_type,
2de001ee
RS
1761 vect_memory_access_type *memory_access_type)
1762{
1763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1764 vec_info *vinfo = stmt_info->vinfo;
1765 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1766 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1767 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 1768 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
1769 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1770 bool single_element_p = (stmt == first_stmt
1771 && !GROUP_NEXT_ELEMENT (stmt_info));
1772 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 1773 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1774
1775 /* True if the vectorized statements would access beyond the last
1776 statement in the group. */
1777 bool overrun_p = false;
1778
1779 /* True if we can cope with such overrun by peeling for gaps, so that
1780 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
1781 bool can_overrun_p = (!masked_p
1782 && vls_type == VLS_LOAD
1783 && loop_vinfo
1784 && !loop->inner);
2de001ee
RS
1785
1786 /* There can only be a gap at the end of the group if the stride is
1787 known at compile time. */
1788 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1789
1790 /* Stores can't yet have gaps. */
1791 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1792
1793 if (slp)
1794 {
1795 if (STMT_VINFO_STRIDED_P (stmt_info))
1796 {
1797 /* Try to use consecutive accesses of GROUP_SIZE elements,
1798 separated by the stride, until we have a complete vector.
1799 Fall back to scalar accesses if that isn't possible. */
928686b1 1800 if (multiple_p (nunits, group_size))
2de001ee
RS
1801 *memory_access_type = VMAT_STRIDED_SLP;
1802 else
1803 *memory_access_type = VMAT_ELEMENTWISE;
1804 }
1805 else
1806 {
1807 overrun_p = loop_vinfo && gap != 0;
1808 if (overrun_p && vls_type != VLS_LOAD)
1809 {
1810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1811 "Grouped store with gaps requires"
1812 " non-consecutive accesses\n");
1813 return false;
1814 }
f702e7d4
RS
1815 /* An overrun is fine if the trailing elements are smaller
1816 than the alignment boundary B. Every vector access will
1817 be a multiple of B and so we are guaranteed to access a
1818 non-gap element in the same B-sized block. */
f9ef2c76 1819 if (overrun_p
f702e7d4
RS
1820 && gap < (vect_known_alignment_in_bytes (first_dr)
1821 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1822 overrun_p = false;
2de001ee
RS
1823 if (overrun_p && !can_overrun_p)
1824 {
1825 if (dump_enabled_p ())
1826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1827 "Peeling for outer loop is not supported\n");
1828 return false;
1829 }
1830 *memory_access_type = VMAT_CONTIGUOUS;
1831 }
1832 }
1833 else
1834 {
1835 /* We can always handle this case using elementwise accesses,
1836 but see if something more efficient is available. */
1837 *memory_access_type = VMAT_ELEMENTWISE;
1838
1839 /* If there is a gap at the end of the group then these optimizations
1840 would access excess elements in the last iteration. */
1841 bool would_overrun_p = (gap != 0);
f702e7d4
RS
1842 /* An overrun is fine if the trailing elements are smaller than the
1843 alignment boundary B. Every vector access will be a multiple of B
1844 and so we are guaranteed to access a non-gap element in the
1845 same B-sized block. */
f9ef2c76 1846 if (would_overrun_p
7e11fc7f 1847 && !masked_p
f702e7d4
RS
1848 && gap < (vect_known_alignment_in_bytes (first_dr)
1849 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1850 would_overrun_p = false;
f702e7d4 1851
2de001ee 1852 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
1853 && (can_overrun_p || !would_overrun_p)
1854 && compare_step_with_zero (stmt) > 0)
2de001ee 1855 {
6737facb
RS
1856 /* First cope with the degenerate case of a single-element
1857 vector. */
1858 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
1859 *memory_access_type = VMAT_CONTIGUOUS;
1860
1861 /* Otherwise try using LOAD/STORE_LANES. */
1862 if (*memory_access_type == VMAT_ELEMENTWISE
1863 && (vls_type == VLS_LOAD
7e11fc7f
RS
1864 ? vect_load_lanes_supported (vectype, group_size, masked_p)
1865 : vect_store_lanes_supported (vectype, group_size,
1866 masked_p)))
2de001ee
RS
1867 {
1868 *memory_access_type = VMAT_LOAD_STORE_LANES;
1869 overrun_p = would_overrun_p;
1870 }
1871
1872 /* If that fails, try using permuting loads. */
1873 if (*memory_access_type == VMAT_ELEMENTWISE
1874 && (vls_type == VLS_LOAD
1875 ? vect_grouped_load_supported (vectype, single_element_p,
1876 group_size)
1877 : vect_grouped_store_supported (vectype, group_size)))
1878 {
1879 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1880 overrun_p = would_overrun_p;
1881 }
1882 }
1883 }
1884
1885 if (vls_type != VLS_LOAD && first_stmt == stmt)
1886 {
1887 /* STMT is the leader of the group. Check the operands of all the
1888 stmts of the group. */
1889 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1890 while (next_stmt)
1891 {
7e11fc7f 1892 tree op = vect_get_store_rhs (next_stmt);
2de001ee
RS
1893 gimple *def_stmt;
1894 enum vect_def_type dt;
1895 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1896 {
1897 if (dump_enabled_p ())
1898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1899 "use not simple.\n");
1900 return false;
1901 }
1902 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1903 }
1904 }
1905
1906 if (overrun_p)
1907 {
1908 gcc_assert (can_overrun_p);
1909 if (dump_enabled_p ())
1910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1911 "Data access with gaps requires scalar "
1912 "epilogue loop\n");
1913 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1914 }
1915
1916 return true;
1917}
1918
62da9e14
RS
1919/* A subroutine of get_load_store_type, with a subset of the same
1920 arguments. Handle the case where STMT is a load or store that
1921 accesses consecutive elements with a negative step. */
1922
1923static vect_memory_access_type
1924get_negative_load_store_type (gimple *stmt, tree vectype,
1925 vec_load_store_type vls_type,
1926 unsigned int ncopies)
1927{
1928 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1929 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1930 dr_alignment_support alignment_support_scheme;
1931
1932 if (ncopies > 1)
1933 {
1934 if (dump_enabled_p ())
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1936 "multiple types with negative step.\n");
1937 return VMAT_ELEMENTWISE;
1938 }
1939
1940 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1941 if (alignment_support_scheme != dr_aligned
1942 && alignment_support_scheme != dr_unaligned_supported)
1943 {
1944 if (dump_enabled_p ())
1945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1946 "negative step but alignment required.\n");
1947 return VMAT_ELEMENTWISE;
1948 }
1949
1950 if (vls_type == VLS_STORE_INVARIANT)
1951 {
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_NOTE, vect_location,
1954 "negative step with invariant source;"
1955 " no permute needed.\n");
1956 return VMAT_CONTIGUOUS_DOWN;
1957 }
1958
1959 if (!perm_mask_for_reverse (vectype))
1960 {
1961 if (dump_enabled_p ())
1962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1963 "negative step and reversing not supported.\n");
1964 return VMAT_ELEMENTWISE;
1965 }
1966
1967 return VMAT_CONTIGUOUS_REVERSE;
1968}
1969
2de001ee
RS
1970/* Analyze load or store statement STMT of type VLS_TYPE. Return true
1971 if there is a memory access type that the vectorized form can use,
1972 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1973 or scatters, fill in GS_INFO accordingly.
1974
1975 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 1976 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
1977 VECTYPE is the vector type that the vectorized statements will use.
1978 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
1979
1980static bool
7e11fc7f 1981get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 1982 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
1983 vect_memory_access_type *memory_access_type,
1984 gather_scatter_info *gs_info)
1985{
1986 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1987 vec_info *vinfo = stmt_info->vinfo;
1988 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 1989 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1990 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1991 {
1992 *memory_access_type = VMAT_GATHER_SCATTER;
1993 gimple *def_stmt;
1994 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1995 gcc_unreachable ();
1996 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1997 &gs_info->offset_dt,
1998 &gs_info->offset_vectype))
1999 {
2000 if (dump_enabled_p ())
2001 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2002 "%s index use not simple.\n",
2003 vls_type == VLS_LOAD ? "gather" : "scatter");
2004 return false;
2005 }
2006 }
2007 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2008 {
7e11fc7f 2009 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2de001ee
RS
2010 memory_access_type))
2011 return false;
2012 }
2013 else if (STMT_VINFO_STRIDED_P (stmt_info))
2014 {
2015 gcc_assert (!slp);
2016 *memory_access_type = VMAT_ELEMENTWISE;
2017 }
2018 else
62da9e14
RS
2019 {
2020 int cmp = compare_step_with_zero (stmt);
2021 if (cmp < 0)
2022 *memory_access_type = get_negative_load_store_type
2023 (stmt, vectype, vls_type, ncopies);
2024 else if (cmp == 0)
2025 {
2026 gcc_assert (vls_type == VLS_LOAD);
2027 *memory_access_type = VMAT_INVARIANT;
2028 }
2029 else
2030 *memory_access_type = VMAT_CONTIGUOUS;
2031 }
2de001ee 2032
4d694b27
RS
2033 if ((*memory_access_type == VMAT_ELEMENTWISE
2034 || *memory_access_type == VMAT_STRIDED_SLP)
2035 && !nunits.is_constant ())
2036 {
2037 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2039 "Not using elementwise accesses due to variable "
2040 "vectorization factor.\n");
2041 return false;
2042 }
2043
2de001ee
RS
2044 /* FIXME: At the moment the cost model seems to underestimate the
2045 cost of using elementwise accesses. This check preserves the
2046 traditional behavior until that can be fixed. */
2047 if (*memory_access_type == VMAT_ELEMENTWISE
2048 && !STMT_VINFO_STRIDED_P (stmt_info))
2049 {
2050 if (dump_enabled_p ())
2051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2052 "not falling back to elementwise accesses\n");
2053 return false;
2054 }
2055 return true;
2056}
2057
aaeefd88
RS
2058/* Return true if boolean argument MASK is suitable for vectorizing
2059 conditional load or store STMT. When returning true, store the
2060 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2061
2062static bool
2063vect_check_load_store_mask (gimple *stmt, tree mask, tree *mask_vectype_out)
2064{
2065 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2066 {
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2069 "mask argument is not a boolean.\n");
2070 return false;
2071 }
2072
2073 if (TREE_CODE (mask) != SSA_NAME)
2074 {
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 "mask argument is not an SSA name.\n");
2078 return false;
2079 }
2080
2081 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2082 gimple *def_stmt;
2083 enum vect_def_type dt;
2084 tree mask_vectype;
2085 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &dt,
2086 &mask_vectype))
2087 {
2088 if (dump_enabled_p ())
2089 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2090 "mask use not simple.\n");
2091 return false;
2092 }
2093
2094 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2095 if (!mask_vectype)
2096 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2097
2098 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2099 {
2100 if (dump_enabled_p ())
2101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2102 "could not find an appropriate vector mask type.\n");
2103 return false;
2104 }
2105
2106 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2107 TYPE_VECTOR_SUBPARTS (vectype)))
2108 {
2109 if (dump_enabled_p ())
2110 {
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2112 "vector mask type ");
2113 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2114 dump_printf (MSG_MISSED_OPTIMIZATION,
2115 " does not match vector data type ");
2116 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2117 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2118 }
2119 return false;
2120 }
2121
2122 *mask_vectype_out = mask_vectype;
2123 return true;
2124}
2125
3133c3b6
RS
2126/* Return true if stored value RHS is suitable for vectorizing store
2127 statement STMT. When returning true, store the type of the
2128 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2129 store in *VLS_TYPE_OUT. */
2130
2131static bool
2132vect_check_store_rhs (gimple *stmt, tree rhs, tree *rhs_vectype_out,
2133 vec_load_store_type *vls_type_out)
2134{
2135 /* In the case this is a store from a constant make sure
2136 native_encode_expr can handle it. */
2137 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2138 {
2139 if (dump_enabled_p ())
2140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2141 "cannot encode constant as a byte sequence.\n");
2142 return false;
2143 }
2144
2145 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2146 gimple *def_stmt;
2147 enum vect_def_type dt;
2148 tree rhs_vectype;
2149 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &dt,
2150 &rhs_vectype))
2151 {
2152 if (dump_enabled_p ())
2153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2154 "use not simple.\n");
2155 return false;
2156 }
2157
2158 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2159 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2160 {
2161 if (dump_enabled_p ())
2162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2163 "incompatible vector types.\n");
2164 return false;
2165 }
2166
2167 *rhs_vectype_out = rhs_vectype;
2168 if (dt == vect_constant_def || dt == vect_external_def)
2169 *vls_type_out = VLS_STORE_INVARIANT;
2170 else
2171 *vls_type_out = VLS_STORE;
2172 return true;
2173}
2174
bc9587eb
RS
2175/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2176 Note that we support masks with floating-point type, in which case the
2177 floats are interpreted as a bitmask. */
2178
2179static tree
2180vect_build_all_ones_mask (gimple *stmt, tree masktype)
2181{
2182 if (TREE_CODE (masktype) == INTEGER_TYPE)
2183 return build_int_cst (masktype, -1);
2184 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2185 {
2186 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2187 mask = build_vector_from_val (masktype, mask);
2188 return vect_init_vector (stmt, mask, masktype, NULL);
2189 }
2190 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2191 {
2192 REAL_VALUE_TYPE r;
2193 long tmp[6];
2194 for (int j = 0; j < 6; ++j)
2195 tmp[j] = -1;
2196 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2197 tree mask = build_real (TREE_TYPE (masktype), r);
2198 mask = build_vector_from_val (masktype, mask);
2199 return vect_init_vector (stmt, mask, masktype, NULL);
2200 }
2201 gcc_unreachable ();
2202}
2203
2204/* Build an all-zero merge value of type VECTYPE while vectorizing
2205 STMT as a gather load. */
2206
2207static tree
2208vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2209{
2210 tree merge;
2211 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2212 merge = build_int_cst (TREE_TYPE (vectype), 0);
2213 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2214 {
2215 REAL_VALUE_TYPE r;
2216 long tmp[6];
2217 for (int j = 0; j < 6; ++j)
2218 tmp[j] = 0;
2219 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2220 merge = build_real (TREE_TYPE (vectype), r);
2221 }
2222 else
2223 gcc_unreachable ();
2224 merge = build_vector_from_val (vectype, merge);
2225 return vect_init_vector (stmt, merge, vectype, NULL);
2226}
2227
c48d2d35
RS
2228/* Build a gather load call while vectorizing STMT. Insert new instructions
2229 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2230 operation. If the load is conditional, MASK is the unvectorized
2231 condition, otherwise MASK is null. */
2232
2233static void
2234vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2235 gimple **vec_stmt, gather_scatter_info *gs_info,
2236 tree mask)
2237{
2238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2239 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2241 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2242 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2243 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2244 edge pe = loop_preheader_edge (loop);
2245 enum { NARROW, NONE, WIDEN } modifier;
2246 poly_uint64 gather_off_nunits
2247 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2248
2249 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2250 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2251 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2252 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2253 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2254 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2255 tree scaletype = TREE_VALUE (arglist);
2256 gcc_checking_assert (types_compatible_p (srctype, rettype)
2257 && (!mask || types_compatible_p (srctype, masktype)));
2258
2259 tree perm_mask = NULL_TREE;
2260 tree mask_perm_mask = NULL_TREE;
2261 if (known_eq (nunits, gather_off_nunits))
2262 modifier = NONE;
2263 else if (known_eq (nunits * 2, gather_off_nunits))
2264 {
2265 modifier = WIDEN;
2266
2267 /* Currently widening gathers and scatters are only supported for
2268 fixed-length vectors. */
2269 int count = gather_off_nunits.to_constant ();
2270 vec_perm_builder sel (count, count, 1);
2271 for (int i = 0; i < count; ++i)
2272 sel.quick_push (i | (count / 2));
2273
2274 vec_perm_indices indices (sel, 1, count);
2275 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2276 indices);
2277 }
2278 else if (known_eq (nunits, gather_off_nunits * 2))
2279 {
2280 modifier = NARROW;
2281
2282 /* Currently narrowing gathers and scatters are only supported for
2283 fixed-length vectors. */
2284 int count = nunits.to_constant ();
2285 vec_perm_builder sel (count, count, 1);
2286 sel.quick_grow (count);
2287 for (int i = 0; i < count; ++i)
2288 sel[i] = i < count / 2 ? i : i + count / 2;
2289 vec_perm_indices indices (sel, 2, count);
2290 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2291
2292 ncopies *= 2;
2293
2294 if (mask)
2295 {
2296 for (int i = 0; i < count; ++i)
2297 sel[i] = i | (count / 2);
2298 indices.new_vector (sel, 2, count);
2299 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2300 }
2301 }
2302 else
2303 gcc_unreachable ();
2304
2305 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2306 vectype);
2307
2308 tree ptr = fold_convert (ptrtype, gs_info->base);
2309 if (!is_gimple_min_invariant (ptr))
2310 {
2311 gimple_seq seq;
2312 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2313 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2314 gcc_assert (!new_bb);
2315 }
2316
2317 tree scale = build_int_cst (scaletype, gs_info->scale);
2318
2319 tree vec_oprnd0 = NULL_TREE;
2320 tree vec_mask = NULL_TREE;
2321 tree src_op = NULL_TREE;
2322 tree mask_op = NULL_TREE;
2323 tree prev_res = NULL_TREE;
2324 stmt_vec_info prev_stmt_info = NULL;
2325
2326 if (!mask)
2327 {
2328 src_op = vect_build_zero_merge_argument (stmt, rettype);
2329 mask_op = vect_build_all_ones_mask (stmt, masktype);
2330 }
2331
2332 for (int j = 0; j < ncopies; ++j)
2333 {
2334 tree op, var;
2335 gimple *new_stmt;
2336 if (modifier == WIDEN && (j & 1))
2337 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2338 perm_mask, stmt, gsi);
2339 else if (j == 0)
2340 op = vec_oprnd0
2341 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2342 else
2343 op = vec_oprnd0
2344 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2345
2346 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2347 {
2348 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2349 TYPE_VECTOR_SUBPARTS (idxtype)));
2350 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2351 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2352 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2354 op = var;
2355 }
2356
2357 if (mask)
2358 {
2359 if (mask_perm_mask && (j & 1))
2360 mask_op = permute_vec_elements (mask_op, mask_op,
2361 mask_perm_mask, stmt, gsi);
2362 else
2363 {
2364 if (j == 0)
2365 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2366 else
2367 {
2368 gimple *def_stmt;
2369 enum vect_def_type dt;
2370 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2371 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2372 }
2373
2374 mask_op = vec_mask;
2375 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2376 {
2377 gcc_assert
2378 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2379 TYPE_VECTOR_SUBPARTS (masktype)));
2380 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2381 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2382 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2383 mask_op);
2384 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2385 mask_op = var;
2386 }
2387 }
2388 src_op = mask_op;
2389 }
2390
2391 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2392 mask_op, scale);
2393
2394 if (!useless_type_conversion_p (vectype, rettype))
2395 {
2396 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2397 TYPE_VECTOR_SUBPARTS (rettype)));
2398 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2399 gimple_call_set_lhs (new_stmt, op);
2400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2401 var = make_ssa_name (vec_dest);
2402 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2403 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2404 }
2405 else
2406 {
2407 var = make_ssa_name (vec_dest, new_stmt);
2408 gimple_call_set_lhs (new_stmt, var);
2409 }
2410
2411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2412
2413 if (modifier == NARROW)
2414 {
2415 if ((j & 1) == 0)
2416 {
2417 prev_res = var;
2418 continue;
2419 }
2420 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2421 new_stmt = SSA_NAME_DEF_STMT (var);
2422 }
2423
2424 if (prev_stmt_info == NULL)
2425 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2426 else
2427 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2428 prev_stmt_info = vinfo_for_stmt (new_stmt);
2429 }
2430}
2431
37b14185
RB
2432/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2433
2434static bool
2435vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2436 gimple **vec_stmt, slp_tree slp_node,
2437 tree vectype_in, enum vect_def_type *dt)
2438{
2439 tree op, vectype;
2440 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2441 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2442 unsigned ncopies;
2443 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2444
2445 op = gimple_call_arg (stmt, 0);
2446 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2447
2448 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2449 return false;
37b14185
RB
2450
2451 /* Multiple types in SLP are handled by creating the appropriate number of
2452 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2453 case of SLP. */
2454 if (slp_node)
2455 ncopies = 1;
2456 else
e8f142e2 2457 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2458
2459 gcc_assert (ncopies >= 1);
2460
2461 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2462 if (! char_vectype)
2463 return false;
2464
928686b1
RS
2465 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2466 return false;
2467
794e3180 2468 unsigned word_bytes = num_bytes / nunits;
908a1a16 2469
d980067b
RS
2470 /* The encoding uses one stepped pattern for each byte in the word. */
2471 vec_perm_builder elts (num_bytes, word_bytes, 3);
2472 for (unsigned i = 0; i < 3; ++i)
37b14185 2473 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2474 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2475
e3342de4
RS
2476 vec_perm_indices indices (elts, 1, num_bytes);
2477 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2478 return false;
2479
2480 if (! vec_stmt)
2481 {
2482 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2485 "\n");
2486 if (! PURE_SLP_STMT (stmt_info))
2487 {
2488 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2489 1, vector_stmt, stmt_info, 0, vect_prologue);
2490 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2491 ncopies, vec_perm, stmt_info, 0, vect_body);
2492 }
2493 return true;
2494 }
2495
736d0f28 2496 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
2497
2498 /* Transform. */
2499 vec<tree> vec_oprnds = vNULL;
2500 gimple *new_stmt = NULL;
2501 stmt_vec_info prev_stmt_info = NULL;
2502 for (unsigned j = 0; j < ncopies; j++)
2503 {
2504 /* Handle uses. */
2505 if (j == 0)
306b0c92 2506 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
2507 else
2508 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2509
2510 /* Arguments are ready. create the new vector stmt. */
2511 unsigned i;
2512 tree vop;
2513 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2514 {
2515 tree tem = make_ssa_name (char_vectype);
2516 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2517 char_vectype, vop));
2518 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2519 tree tem2 = make_ssa_name (char_vectype);
2520 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2521 tem, tem, bswap_vconst);
2522 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2523 tem = make_ssa_name (vectype);
2524 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2525 vectype, tem2));
2526 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2527 if (slp_node)
2528 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2529 }
2530
2531 if (slp_node)
2532 continue;
2533
2534 if (j == 0)
2535 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2536 else
2537 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2538
2539 prev_stmt_info = vinfo_for_stmt (new_stmt);
2540 }
2541
2542 vec_oprnds.release ();
2543 return true;
2544}
2545
b1b6836e
RS
2546/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2547 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2548 in a single step. On success, store the binary pack code in
2549 *CONVERT_CODE. */
2550
2551static bool
2552simple_integer_narrowing (tree vectype_out, tree vectype_in,
2553 tree_code *convert_code)
2554{
2555 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2556 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2557 return false;
2558
2559 tree_code code;
2560 int multi_step_cvt = 0;
2561 auto_vec <tree, 8> interm_types;
2562 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2563 &code, &multi_step_cvt,
2564 &interm_types)
2565 || multi_step_cvt)
2566 return false;
2567
2568 *convert_code = code;
2569 return true;
2570}
5ce9450f 2571
ebfd146a
IR
2572/* Function vectorizable_call.
2573
538dd0b7 2574 Check if GS performs a function call that can be vectorized.
b8698a0f 2575 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2576 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2577 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2578
2579static bool
355fe088 2580vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2581 slp_tree slp_node)
ebfd146a 2582{
538dd0b7 2583 gcall *stmt;
ebfd146a
IR
2584 tree vec_dest;
2585 tree scalar_dest;
2586 tree op, type;
2587 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2588 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 2589 tree vectype_out, vectype_in;
c7bda0f4
RS
2590 poly_uint64 nunits_in;
2591 poly_uint64 nunits_out;
ebfd146a 2592 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2593 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2594 vec_info *vinfo = stmt_info->vinfo;
81c40241 2595 tree fndecl, new_temp, rhs_type;
355fe088 2596 gimple *def_stmt;
0502fb85
UB
2597 enum vect_def_type dt[3]
2598 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 2599 int ndts = 3;
355fe088 2600 gimple *new_stmt = NULL;
ebfd146a 2601 int ncopies, j;
6e1aa848 2602 vec<tree> vargs = vNULL;
ebfd146a
IR
2603 enum { NARROW, NONE, WIDEN } modifier;
2604 size_t i, nargs;
9d5e7640 2605 tree lhs;
ebfd146a 2606
190c2236 2607 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2608 return false;
2609
66c16fd9
RB
2610 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2611 && ! vec_stmt)
ebfd146a
IR
2612 return false;
2613
538dd0b7
DM
2614 /* Is GS a vectorizable call? */
2615 stmt = dyn_cast <gcall *> (gs);
2616 if (!stmt)
ebfd146a
IR
2617 return false;
2618
5ce9450f
JJ
2619 if (gimple_call_internal_p (stmt)
2620 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2621 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
c3a8f964
RS
2622 /* Handled by vectorizable_load and vectorizable_store. */
2623 return false;
5ce9450f 2624
0136f8f0
AH
2625 if (gimple_call_lhs (stmt) == NULL_TREE
2626 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2627 return false;
2628
0136f8f0 2629 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2630
b690cc0f
RG
2631 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2632
ebfd146a
IR
2633 /* Process function arguments. */
2634 rhs_type = NULL_TREE;
b690cc0f 2635 vectype_in = NULL_TREE;
ebfd146a
IR
2636 nargs = gimple_call_num_args (stmt);
2637
1b1562a5
MM
2638 /* Bail out if the function has more than three arguments, we do not have
2639 interesting builtin functions to vectorize with more than two arguments
2640 except for fma. No arguments is also not good. */
2641 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2642 return false;
2643
74bf76ed
JJ
2644 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2645 if (gimple_call_internal_p (stmt)
2646 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2647 {
2648 nargs = 0;
2649 rhs_type = unsigned_type_node;
2650 }
2651
ebfd146a
IR
2652 for (i = 0; i < nargs; i++)
2653 {
b690cc0f
RG
2654 tree opvectype;
2655
ebfd146a
IR
2656 op = gimple_call_arg (stmt, i);
2657
2658 /* We can only handle calls with arguments of the same type. */
2659 if (rhs_type
8533c9d8 2660 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2661 {
73fbfcad 2662 if (dump_enabled_p ())
78c60e3d 2663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2664 "argument types differ.\n");
ebfd146a
IR
2665 return false;
2666 }
b690cc0f
RG
2667 if (!rhs_type)
2668 rhs_type = TREE_TYPE (op);
ebfd146a 2669
81c40241 2670 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2671 {
73fbfcad 2672 if (dump_enabled_p ())
78c60e3d 2673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2674 "use not simple.\n");
ebfd146a
IR
2675 return false;
2676 }
ebfd146a 2677
b690cc0f
RG
2678 if (!vectype_in)
2679 vectype_in = opvectype;
2680 else if (opvectype
2681 && opvectype != vectype_in)
2682 {
73fbfcad 2683 if (dump_enabled_p ())
78c60e3d 2684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2685 "argument vector types differ.\n");
b690cc0f
RG
2686 return false;
2687 }
2688 }
2689 /* If all arguments are external or constant defs use a vector type with
2690 the same size as the output vector type. */
ebfd146a 2691 if (!vectype_in)
b690cc0f 2692 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2693 if (vec_stmt)
2694 gcc_assert (vectype_in);
2695 if (!vectype_in)
2696 {
73fbfcad 2697 if (dump_enabled_p ())
7d8930a0 2698 {
78c60e3d
SS
2699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2700 "no vectype for scalar type ");
2701 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2702 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2703 }
2704
2705 return false;
2706 }
ebfd146a
IR
2707
2708 /* FORNOW */
b690cc0f
RG
2709 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2710 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 2711 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 2712 modifier = NARROW;
c7bda0f4 2713 else if (known_eq (nunits_out, nunits_in))
ebfd146a 2714 modifier = NONE;
c7bda0f4 2715 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
2716 modifier = WIDEN;
2717 else
2718 return false;
2719
70439f0d
RS
2720 /* We only handle functions that do not read or clobber memory. */
2721 if (gimple_vuse (stmt))
2722 {
2723 if (dump_enabled_p ())
2724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2725 "function reads from or writes to memory.\n");
2726 return false;
2727 }
2728
ebfd146a
IR
2729 /* For now, we only vectorize functions if a target specific builtin
2730 is available. TODO -- in some cases, it might be profitable to
2731 insert the calls for pieces of the vector, in order to be able
2732 to vectorize other operations in the loop. */
70439f0d
RS
2733 fndecl = NULL_TREE;
2734 internal_fn ifn = IFN_LAST;
2735 combined_fn cfn = gimple_call_combined_fn (stmt);
2736 tree callee = gimple_call_fndecl (stmt);
2737
2738 /* First try using an internal function. */
b1b6836e
RS
2739 tree_code convert_code = ERROR_MARK;
2740 if (cfn != CFN_LAST
2741 && (modifier == NONE
2742 || (modifier == NARROW
2743 && simple_integer_narrowing (vectype_out, vectype_in,
2744 &convert_code))))
70439f0d
RS
2745 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2746 vectype_in);
2747
2748 /* If that fails, try asking for a target-specific built-in function. */
2749 if (ifn == IFN_LAST)
2750 {
2751 if (cfn != CFN_LAST)
2752 fndecl = targetm.vectorize.builtin_vectorized_function
2753 (cfn, vectype_out, vectype_in);
2754 else
2755 fndecl = targetm.vectorize.builtin_md_vectorized_function
2756 (callee, vectype_out, vectype_in);
2757 }
2758
2759 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2760 {
70439f0d 2761 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2762 && !slp_node
2763 && loop_vinfo
2764 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2765 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2766 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2767 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2768 {
2769 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2770 { 0, 1, 2, ... vf - 1 } vector. */
2771 gcc_assert (nargs == 0);
2772 }
37b14185
RB
2773 else if (modifier == NONE
2774 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2775 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2776 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2777 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2778 vectype_in, dt);
74bf76ed
JJ
2779 else
2780 {
2781 if (dump_enabled_p ())
2782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2783 "function is not vectorizable.\n");
74bf76ed
JJ
2784 return false;
2785 }
ebfd146a
IR
2786 }
2787
fce57248 2788 if (slp_node)
190c2236 2789 ncopies = 1;
b1b6836e 2790 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 2791 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 2792 else
e8f142e2 2793 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
2794
2795 /* Sanity check: make sure that at least one copy of the vectorized stmt
2796 needs to be generated. */
2797 gcc_assert (ncopies >= 1);
2798
2799 if (!vec_stmt) /* transformation not required. */
2800 {
2801 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2802 if (dump_enabled_p ())
e645e942
TJ
2803 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2804 "\n");
4fc5ebf1 2805 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
b1b6836e
RS
2806 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2807 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2808 vec_promote_demote, stmt_info, 0, vect_body);
2809
ebfd146a
IR
2810 return true;
2811 }
2812
67b8dbac 2813 /* Transform. */
ebfd146a 2814
73fbfcad 2815 if (dump_enabled_p ())
e645e942 2816 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2817
2818 /* Handle def. */
2819 scalar_dest = gimple_call_lhs (stmt);
2820 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2821
2822 prev_stmt_info = NULL;
b1b6836e 2823 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2824 {
b1b6836e 2825 tree prev_res = NULL_TREE;
ebfd146a
IR
2826 for (j = 0; j < ncopies; ++j)
2827 {
2828 /* Build argument list for the vectorized call. */
2829 if (j == 0)
9771b263 2830 vargs.create (nargs);
ebfd146a 2831 else
9771b263 2832 vargs.truncate (0);
ebfd146a 2833
190c2236
JJ
2834 if (slp_node)
2835 {
ef062b13 2836 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2837 vec<tree> vec_oprnds0;
190c2236
JJ
2838
2839 for (i = 0; i < nargs; i++)
9771b263 2840 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 2841 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 2842 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2843
2844 /* Arguments are ready. Create the new vector stmt. */
9771b263 2845 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2846 {
2847 size_t k;
2848 for (k = 0; k < nargs; k++)
2849 {
37b5ec8f 2850 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2851 vargs[k] = vec_oprndsk[i];
190c2236 2852 }
b1b6836e
RS
2853 if (modifier == NARROW)
2854 {
2855 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2856 gcall *call
2857 = gimple_build_call_internal_vec (ifn, vargs);
2858 gimple_call_set_lhs (call, half_res);
2859 gimple_call_set_nothrow (call, true);
2860 new_stmt = call;
b1b6836e
RS
2861 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2862 if ((i & 1) == 0)
2863 {
2864 prev_res = half_res;
2865 continue;
2866 }
2867 new_temp = make_ssa_name (vec_dest);
2868 new_stmt = gimple_build_assign (new_temp, convert_code,
2869 prev_res, half_res);
2870 }
70439f0d 2871 else
b1b6836e 2872 {
a844293d 2873 gcall *call;
b1b6836e 2874 if (ifn != IFN_LAST)
a844293d 2875 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 2876 else
a844293d
RS
2877 call = gimple_build_call_vec (fndecl, vargs);
2878 new_temp = make_ssa_name (vec_dest, call);
2879 gimple_call_set_lhs (call, new_temp);
2880 gimple_call_set_nothrow (call, true);
2881 new_stmt = call;
b1b6836e 2882 }
190c2236 2883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2884 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2885 }
2886
2887 for (i = 0; i < nargs; i++)
2888 {
37b5ec8f 2889 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2890 vec_oprndsi.release ();
190c2236 2891 }
190c2236
JJ
2892 continue;
2893 }
2894
ebfd146a
IR
2895 for (i = 0; i < nargs; i++)
2896 {
2897 op = gimple_call_arg (stmt, i);
2898 if (j == 0)
2899 vec_oprnd0
81c40241 2900 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2901 else
63827fb8
IR
2902 {
2903 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2904 vec_oprnd0
2905 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2906 }
ebfd146a 2907
9771b263 2908 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2909 }
2910
74bf76ed
JJ
2911 if (gimple_call_internal_p (stmt)
2912 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2913 {
c7bda0f4 2914 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 2915 tree new_var
0e22bb5a 2916 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 2917 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 2918 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2919 new_temp = make_ssa_name (vec_dest);
0e22bb5a 2920 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 2921 }
b1b6836e
RS
2922 else if (modifier == NARROW)
2923 {
2924 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2925 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2926 gimple_call_set_lhs (call, half_res);
2927 gimple_call_set_nothrow (call, true);
2928 new_stmt = call;
b1b6836e
RS
2929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2930 if ((j & 1) == 0)
2931 {
2932 prev_res = half_res;
2933 continue;
2934 }
2935 new_temp = make_ssa_name (vec_dest);
2936 new_stmt = gimple_build_assign (new_temp, convert_code,
2937 prev_res, half_res);
2938 }
74bf76ed
JJ
2939 else
2940 {
a844293d 2941 gcall *call;
70439f0d 2942 if (ifn != IFN_LAST)
a844293d 2943 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 2944 else
a844293d 2945 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 2946 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
2947 gimple_call_set_lhs (call, new_temp);
2948 gimple_call_set_nothrow (call, true);
2949 new_stmt = call;
74bf76ed 2950 }
ebfd146a
IR
2951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2952
b1b6836e 2953 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
2954 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2955 else
2956 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2957
2958 prev_stmt_info = vinfo_for_stmt (new_stmt);
2959 }
b1b6836e
RS
2960 }
2961 else if (modifier == NARROW)
2962 {
ebfd146a
IR
2963 for (j = 0; j < ncopies; ++j)
2964 {
2965 /* Build argument list for the vectorized call. */
2966 if (j == 0)
9771b263 2967 vargs.create (nargs * 2);
ebfd146a 2968 else
9771b263 2969 vargs.truncate (0);
ebfd146a 2970
190c2236
JJ
2971 if (slp_node)
2972 {
ef062b13 2973 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2974 vec<tree> vec_oprnds0;
190c2236
JJ
2975
2976 for (i = 0; i < nargs; i++)
9771b263 2977 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 2978 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 2979 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2980
2981 /* Arguments are ready. Create the new vector stmt. */
9771b263 2982 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2983 {
2984 size_t k;
9771b263 2985 vargs.truncate (0);
190c2236
JJ
2986 for (k = 0; k < nargs; k++)
2987 {
37b5ec8f 2988 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2989 vargs.quick_push (vec_oprndsk[i]);
2990 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 2991 }
a844293d 2992 gcall *call;
70439f0d 2993 if (ifn != IFN_LAST)
a844293d 2994 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 2995 else
a844293d
RS
2996 call = gimple_build_call_vec (fndecl, vargs);
2997 new_temp = make_ssa_name (vec_dest, call);
2998 gimple_call_set_lhs (call, new_temp);
2999 gimple_call_set_nothrow (call, true);
3000 new_stmt = call;
190c2236 3001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3002 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3003 }
3004
3005 for (i = 0; i < nargs; i++)
3006 {
37b5ec8f 3007 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3008 vec_oprndsi.release ();
190c2236 3009 }
190c2236
JJ
3010 continue;
3011 }
3012
ebfd146a
IR
3013 for (i = 0; i < nargs; i++)
3014 {
3015 op = gimple_call_arg (stmt, i);
3016 if (j == 0)
3017 {
3018 vec_oprnd0
81c40241 3019 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3020 vec_oprnd1
63827fb8 3021 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3022 }
3023 else
3024 {
336ecb65 3025 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3026 vec_oprnd0
63827fb8 3027 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3028 vec_oprnd1
63827fb8 3029 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3030 }
3031
9771b263
DN
3032 vargs.quick_push (vec_oprnd0);
3033 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3034 }
3035
b1b6836e 3036 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3037 new_temp = make_ssa_name (vec_dest, new_stmt);
3038 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3039 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3040
3041 if (j == 0)
3042 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3043 else
3044 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3045
3046 prev_stmt_info = vinfo_for_stmt (new_stmt);
3047 }
3048
3049 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3050 }
b1b6836e
RS
3051 else
3052 /* No current target implements this case. */
3053 return false;
ebfd146a 3054
9771b263 3055 vargs.release ();
ebfd146a 3056
ebfd146a
IR
3057 /* The call in STMT might prevent it from being removed in dce.
3058 We however cannot remove it here, due to the way the ssa name
3059 it defines is mapped to the new definition. So just replace
3060 rhs of the statement with something harmless. */
3061
dd34c087
JJ
3062 if (slp_node)
3063 return true;
3064
ebfd146a 3065 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3066 if (is_pattern_stmt_p (stmt_info))
3067 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3068 else
3069 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3070
9d5e7640 3071 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3072 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3073 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3074 STMT_VINFO_STMT (stmt_info) = new_stmt;
3075 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3076
3077 return true;
3078}
3079
3080
0136f8f0
AH
3081struct simd_call_arg_info
3082{
3083 tree vectype;
3084 tree op;
0136f8f0 3085 HOST_WIDE_INT linear_step;
34e82342 3086 enum vect_def_type dt;
0136f8f0 3087 unsigned int align;
17b658af 3088 bool simd_lane_linear;
0136f8f0
AH
3089};
3090
17b658af
JJ
3091/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3092 is linear within simd lane (but not within whole loop), note it in
3093 *ARGINFO. */
3094
3095static void
3096vect_simd_lane_linear (tree op, struct loop *loop,
3097 struct simd_call_arg_info *arginfo)
3098{
355fe088 3099 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3100
3101 if (!is_gimple_assign (def_stmt)
3102 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3103 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3104 return;
3105
3106 tree base = gimple_assign_rhs1 (def_stmt);
3107 HOST_WIDE_INT linear_step = 0;
3108 tree v = gimple_assign_rhs2 (def_stmt);
3109 while (TREE_CODE (v) == SSA_NAME)
3110 {
3111 tree t;
3112 def_stmt = SSA_NAME_DEF_STMT (v);
3113 if (is_gimple_assign (def_stmt))
3114 switch (gimple_assign_rhs_code (def_stmt))
3115 {
3116 case PLUS_EXPR:
3117 t = gimple_assign_rhs2 (def_stmt);
3118 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3119 return;
3120 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3121 v = gimple_assign_rhs1 (def_stmt);
3122 continue;
3123 case MULT_EXPR:
3124 t = gimple_assign_rhs2 (def_stmt);
3125 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3126 return;
3127 linear_step = tree_to_shwi (t);
3128 v = gimple_assign_rhs1 (def_stmt);
3129 continue;
3130 CASE_CONVERT:
3131 t = gimple_assign_rhs1 (def_stmt);
3132 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3133 || (TYPE_PRECISION (TREE_TYPE (v))
3134 < TYPE_PRECISION (TREE_TYPE (t))))
3135 return;
3136 if (!linear_step)
3137 linear_step = 1;
3138 v = t;
3139 continue;
3140 default:
3141 return;
3142 }
8e4284d0 3143 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3144 && loop->simduid
3145 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3146 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3147 == loop->simduid))
3148 {
3149 if (!linear_step)
3150 linear_step = 1;
3151 arginfo->linear_step = linear_step;
3152 arginfo->op = base;
3153 arginfo->simd_lane_linear = true;
3154 return;
3155 }
3156 }
3157}
3158
cf1b2ba4
RS
3159/* Return the number of elements in vector type VECTYPE, which is associated
3160 with a SIMD clone. At present these vectors always have a constant
3161 length. */
3162
3163static unsigned HOST_WIDE_INT
3164simd_clone_subparts (tree vectype)
3165{
928686b1 3166 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3167}
3168
0136f8f0
AH
3169/* Function vectorizable_simd_clone_call.
3170
3171 Check if STMT performs a function call that can be vectorized
3172 by calling a simd clone of the function.
3173 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3174 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3175 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3176
3177static bool
355fe088
TS
3178vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3179 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
3180{
3181 tree vec_dest;
3182 tree scalar_dest;
3183 tree op, type;
3184 tree vec_oprnd0 = NULL_TREE;
3185 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3186 tree vectype;
3187 unsigned int nunits;
3188 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3189 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3190 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3191 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3192 tree fndecl, new_temp;
355fe088
TS
3193 gimple *def_stmt;
3194 gimple *new_stmt = NULL;
0136f8f0 3195 int ncopies, j;
00426f9a 3196 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3197 vec<tree> vargs = vNULL;
3198 size_t i, nargs;
3199 tree lhs, rtype, ratype;
e7a74006 3200 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3201
3202 /* Is STMT a vectorizable call? */
3203 if (!is_gimple_call (stmt))
3204 return false;
3205
3206 fndecl = gimple_call_fndecl (stmt);
3207 if (fndecl == NULL_TREE)
3208 return false;
3209
d52f5295 3210 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3211 if (node == NULL || node->simd_clones == NULL)
3212 return false;
3213
3214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3215 return false;
3216
66c16fd9
RB
3217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3218 && ! vec_stmt)
0136f8f0
AH
3219 return false;
3220
3221 if (gimple_call_lhs (stmt)
3222 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3223 return false;
3224
3225 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3226
3227 vectype = STMT_VINFO_VECTYPE (stmt_info);
3228
3229 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3230 return false;
3231
3232 /* FORNOW */
fce57248 3233 if (slp_node)
0136f8f0
AH
3234 return false;
3235
3236 /* Process function arguments. */
3237 nargs = gimple_call_num_args (stmt);
3238
3239 /* Bail out if the function has zero arguments. */
3240 if (nargs == 0)
3241 return false;
3242
00426f9a 3243 arginfo.reserve (nargs, true);
0136f8f0
AH
3244
3245 for (i = 0; i < nargs; i++)
3246 {
3247 simd_call_arg_info thisarginfo;
3248 affine_iv iv;
3249
3250 thisarginfo.linear_step = 0;
3251 thisarginfo.align = 0;
3252 thisarginfo.op = NULL_TREE;
17b658af 3253 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3254
3255 op = gimple_call_arg (stmt, i);
81c40241
RB
3256 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3257 &thisarginfo.vectype)
0136f8f0
AH
3258 || thisarginfo.dt == vect_uninitialized_def)
3259 {
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3262 "use not simple.\n");
0136f8f0
AH
3263 return false;
3264 }
3265
3266 if (thisarginfo.dt == vect_constant_def
3267 || thisarginfo.dt == vect_external_def)
3268 gcc_assert (thisarginfo.vectype == NULL_TREE);
3269 else
3270 gcc_assert (thisarginfo.vectype != NULL_TREE);
3271
6c9e85fb
JJ
3272 /* For linear arguments, the analyze phase should have saved
3273 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3274 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3275 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3276 {
3277 gcc_assert (vec_stmt);
3278 thisarginfo.linear_step
17b658af 3279 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3280 thisarginfo.op
17b658af
JJ
3281 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3282 thisarginfo.simd_lane_linear
3283 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3284 == boolean_true_node);
6c9e85fb
JJ
3285 /* If loop has been peeled for alignment, we need to adjust it. */
3286 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3287 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3288 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3289 {
3290 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3291 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3292 tree opt = TREE_TYPE (thisarginfo.op);
3293 bias = fold_convert (TREE_TYPE (step), bias);
3294 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3295 thisarginfo.op
3296 = fold_build2 (POINTER_TYPE_P (opt)
3297 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3298 thisarginfo.op, bias);
3299 }
3300 }
3301 else if (!vec_stmt
3302 && thisarginfo.dt != vect_constant_def
3303 && thisarginfo.dt != vect_external_def
3304 && loop_vinfo
3305 && TREE_CODE (op) == SSA_NAME
3306 && simple_iv (loop, loop_containing_stmt (stmt), op,
3307 &iv, false)
3308 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3309 {
3310 thisarginfo.linear_step = tree_to_shwi (iv.step);
3311 thisarginfo.op = iv.base;
3312 }
3313 else if ((thisarginfo.dt == vect_constant_def
3314 || thisarginfo.dt == vect_external_def)
3315 && POINTER_TYPE_P (TREE_TYPE (op)))
3316 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3317 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3318 linear too. */
3319 if (POINTER_TYPE_P (TREE_TYPE (op))
3320 && !thisarginfo.linear_step
3321 && !vec_stmt
3322 && thisarginfo.dt != vect_constant_def
3323 && thisarginfo.dt != vect_external_def
3324 && loop_vinfo
3325 && !slp_node
3326 && TREE_CODE (op) == SSA_NAME)
3327 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3328
3329 arginfo.quick_push (thisarginfo);
3330 }
3331
d9f21f6a
RS
3332 unsigned HOST_WIDE_INT vf;
3333 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3334 {
3335 if (dump_enabled_p ())
3336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3337 "not considering SIMD clones; not yet supported"
3338 " for variable-width vectors.\n");
3339 return NULL;
3340 }
3341
0136f8f0
AH
3342 unsigned int badness = 0;
3343 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3344 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3345 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3346 else
3347 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3348 n = n->simdclone->next_clone)
3349 {
3350 unsigned int this_badness = 0;
d9f21f6a 3351 if (n->simdclone->simdlen > vf
0136f8f0
AH
3352 || n->simdclone->nargs != nargs)
3353 continue;
d9f21f6a
RS
3354 if (n->simdclone->simdlen < vf)
3355 this_badness += (exact_log2 (vf)
0136f8f0
AH
3356 - exact_log2 (n->simdclone->simdlen)) * 1024;
3357 if (n->simdclone->inbranch)
3358 this_badness += 2048;
3359 int target_badness = targetm.simd_clone.usable (n);
3360 if (target_badness < 0)
3361 continue;
3362 this_badness += target_badness * 512;
3363 /* FORNOW: Have to add code to add the mask argument. */
3364 if (n->simdclone->inbranch)
3365 continue;
3366 for (i = 0; i < nargs; i++)
3367 {
3368 switch (n->simdclone->args[i].arg_type)
3369 {
3370 case SIMD_CLONE_ARG_TYPE_VECTOR:
3371 if (!useless_type_conversion_p
3372 (n->simdclone->args[i].orig_type,
3373 TREE_TYPE (gimple_call_arg (stmt, i))))
3374 i = -1;
3375 else if (arginfo[i].dt == vect_constant_def
3376 || arginfo[i].dt == vect_external_def
3377 || arginfo[i].linear_step)
3378 this_badness += 64;
3379 break;
3380 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3381 if (arginfo[i].dt != vect_constant_def
3382 && arginfo[i].dt != vect_external_def)
3383 i = -1;
3384 break;
3385 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3386 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3387 if (arginfo[i].dt == vect_constant_def
3388 || arginfo[i].dt == vect_external_def
3389 || (arginfo[i].linear_step
3390 != n->simdclone->args[i].linear_step))
3391 i = -1;
3392 break;
3393 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3394 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3395 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3396 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3397 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3398 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3399 /* FORNOW */
3400 i = -1;
3401 break;
3402 case SIMD_CLONE_ARG_TYPE_MASK:
3403 gcc_unreachable ();
3404 }
3405 if (i == (size_t) -1)
3406 break;
3407 if (n->simdclone->args[i].alignment > arginfo[i].align)
3408 {
3409 i = -1;
3410 break;
3411 }
3412 if (arginfo[i].align)
3413 this_badness += (exact_log2 (arginfo[i].align)
3414 - exact_log2 (n->simdclone->args[i].alignment));
3415 }
3416 if (i == (size_t) -1)
3417 continue;
3418 if (bestn == NULL || this_badness < badness)
3419 {
3420 bestn = n;
3421 badness = this_badness;
3422 }
3423 }
3424
3425 if (bestn == NULL)
00426f9a 3426 return false;
0136f8f0
AH
3427
3428 for (i = 0; i < nargs; i++)
3429 if ((arginfo[i].dt == vect_constant_def
3430 || arginfo[i].dt == vect_external_def)
3431 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3432 {
3433 arginfo[i].vectype
3434 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3435 i)));
3436 if (arginfo[i].vectype == NULL
cf1b2ba4 3437 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3438 > bestn->simdclone->simdlen))
00426f9a 3439 return false;
0136f8f0
AH
3440 }
3441
3442 fndecl = bestn->decl;
3443 nunits = bestn->simdclone->simdlen;
d9f21f6a 3444 ncopies = vf / nunits;
0136f8f0
AH
3445
3446 /* If the function isn't const, only allow it in simd loops where user
3447 has asserted that at least nunits consecutive iterations can be
3448 performed using SIMD instructions. */
3449 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3450 && gimple_vuse (stmt))
00426f9a 3451 return false;
0136f8f0
AH
3452
3453 /* Sanity check: make sure that at least one copy of the vectorized stmt
3454 needs to be generated. */
3455 gcc_assert (ncopies >= 1);
3456
3457 if (!vec_stmt) /* transformation not required. */
3458 {
6c9e85fb
JJ
3459 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3460 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3461 if ((bestn->simdclone->args[i].arg_type
3462 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3463 || (bestn->simdclone->args[i].arg_type
3464 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3465 {
17b658af 3466 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3467 + 1);
3468 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3469 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3470 ? size_type_node : TREE_TYPE (arginfo[i].op);
3471 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3472 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3473 tree sll = arginfo[i].simd_lane_linear
3474 ? boolean_true_node : boolean_false_node;
3475 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3476 }
0136f8f0
AH
3477 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3478 if (dump_enabled_p ())
3479 dump_printf_loc (MSG_NOTE, vect_location,
3480 "=== vectorizable_simd_clone_call ===\n");
3481/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3482 return true;
3483 }
3484
67b8dbac 3485 /* Transform. */
0136f8f0
AH
3486
3487 if (dump_enabled_p ())
3488 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3489
3490 /* Handle def. */
3491 scalar_dest = gimple_call_lhs (stmt);
3492 vec_dest = NULL_TREE;
3493 rtype = NULL_TREE;
3494 ratype = NULL_TREE;
3495 if (scalar_dest)
3496 {
3497 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3498 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3499 if (TREE_CODE (rtype) == ARRAY_TYPE)
3500 {
3501 ratype = rtype;
3502 rtype = TREE_TYPE (ratype);
3503 }
3504 }
3505
3506 prev_stmt_info = NULL;
3507 for (j = 0; j < ncopies; ++j)
3508 {
3509 /* Build argument list for the vectorized call. */
3510 if (j == 0)
3511 vargs.create (nargs);
3512 else
3513 vargs.truncate (0);
3514
3515 for (i = 0; i < nargs; i++)
3516 {
3517 unsigned int k, l, m, o;
3518 tree atype;
3519 op = gimple_call_arg (stmt, i);
3520 switch (bestn->simdclone->args[i].arg_type)
3521 {
3522 case SIMD_CLONE_ARG_TYPE_VECTOR:
3523 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 3524 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
3525 for (m = j * o; m < (j + 1) * o; m++)
3526 {
cf1b2ba4
RS
3527 if (simd_clone_subparts (atype)
3528 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 3529 {
73a699ae 3530 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
3531 k = (simd_clone_subparts (arginfo[i].vectype)
3532 / simd_clone_subparts (atype));
0136f8f0
AH
3533 gcc_assert ((k & (k - 1)) == 0);
3534 if (m == 0)
3535 vec_oprnd0
81c40241 3536 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3537 else
3538 {
3539 vec_oprnd0 = arginfo[i].op;
3540 if ((m & (k - 1)) == 0)
3541 vec_oprnd0
3542 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3543 vec_oprnd0);
3544 }
3545 arginfo[i].op = vec_oprnd0;
3546 vec_oprnd0
3547 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 3548 bitsize_int (prec),
0136f8f0
AH
3549 bitsize_int ((m & (k - 1)) * prec));
3550 new_stmt
b731b390 3551 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3552 vec_oprnd0);
3553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3554 vargs.safe_push (gimple_assign_lhs (new_stmt));
3555 }
3556 else
3557 {
cf1b2ba4
RS
3558 k = (simd_clone_subparts (atype)
3559 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
3560 gcc_assert ((k & (k - 1)) == 0);
3561 vec<constructor_elt, va_gc> *ctor_elts;
3562 if (k != 1)
3563 vec_alloc (ctor_elts, k);
3564 else
3565 ctor_elts = NULL;
3566 for (l = 0; l < k; l++)
3567 {
3568 if (m == 0 && l == 0)
3569 vec_oprnd0
81c40241 3570 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3571 else
3572 vec_oprnd0
3573 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3574 arginfo[i].op);
3575 arginfo[i].op = vec_oprnd0;
3576 if (k == 1)
3577 break;
3578 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3579 vec_oprnd0);
3580 }
3581 if (k == 1)
3582 vargs.safe_push (vec_oprnd0);
3583 else
3584 {
3585 vec_oprnd0 = build_constructor (atype, ctor_elts);
3586 new_stmt
b731b390 3587 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3588 vec_oprnd0);
3589 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3590 vargs.safe_push (gimple_assign_lhs (new_stmt));
3591 }
3592 }
3593 }
3594 break;
3595 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3596 vargs.safe_push (op);
3597 break;
3598 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 3599 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3600 if (j == 0)
3601 {
3602 gimple_seq stmts;
3603 arginfo[i].op
3604 = force_gimple_operand (arginfo[i].op, &stmts, true,
3605 NULL_TREE);
3606 if (stmts != NULL)
3607 {
3608 basic_block new_bb;
3609 edge pe = loop_preheader_edge (loop);
3610 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3611 gcc_assert (!new_bb);
3612 }
17b658af
JJ
3613 if (arginfo[i].simd_lane_linear)
3614 {
3615 vargs.safe_push (arginfo[i].op);
3616 break;
3617 }
b731b390 3618 tree phi_res = copy_ssa_name (op);
538dd0b7 3619 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3620 set_vinfo_for_stmt (new_phi,
310213d4 3621 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3622 add_phi_arg (new_phi, arginfo[i].op,
3623 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3624 enum tree_code code
3625 = POINTER_TYPE_P (TREE_TYPE (op))
3626 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3627 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3628 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3629 widest_int cst
3630 = wi::mul (bestn->simdclone->args[i].linear_step,
3631 ncopies * nunits);
3632 tree tcst = wide_int_to_tree (type, cst);
b731b390 3633 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3634 new_stmt
3635 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3636 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3637 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3638 set_vinfo_for_stmt (new_stmt,
310213d4 3639 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3640 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3641 UNKNOWN_LOCATION);
3642 arginfo[i].op = phi_res;
3643 vargs.safe_push (phi_res);
3644 }
3645 else
3646 {
3647 enum tree_code code
3648 = POINTER_TYPE_P (TREE_TYPE (op))
3649 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3650 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3651 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3652 widest_int cst
3653 = wi::mul (bestn->simdclone->args[i].linear_step,
3654 j * nunits);
3655 tree tcst = wide_int_to_tree (type, cst);
b731b390 3656 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3657 new_stmt = gimple_build_assign (new_temp, code,
3658 arginfo[i].op, tcst);
0136f8f0
AH
3659 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3660 vargs.safe_push (new_temp);
3661 }
3662 break;
7adb26f2
JJ
3663 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3664 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 3665 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3666 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3667 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3668 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3669 default:
3670 gcc_unreachable ();
3671 }
3672 }
3673
3674 new_stmt = gimple_build_call_vec (fndecl, vargs);
3675 if (vec_dest)
3676 {
cf1b2ba4 3677 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 3678 if (ratype)
b731b390 3679 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
3680 else if (simd_clone_subparts (vectype)
3681 == simd_clone_subparts (rtype))
0136f8f0
AH
3682 new_temp = make_ssa_name (vec_dest, new_stmt);
3683 else
3684 new_temp = make_ssa_name (rtype, new_stmt);
3685 gimple_call_set_lhs (new_stmt, new_temp);
3686 }
3687 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3688
3689 if (vec_dest)
3690 {
cf1b2ba4 3691 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
3692 {
3693 unsigned int k, l;
73a699ae
RS
3694 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3695 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 3696 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
3697 gcc_assert ((k & (k - 1)) == 0);
3698 for (l = 0; l < k; l++)
3699 {
3700 tree t;
3701 if (ratype)
3702 {
3703 t = build_fold_addr_expr (new_temp);
3704 t = build2 (MEM_REF, vectype, t,
73a699ae 3705 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
3706 }
3707 else
3708 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 3709 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 3710 new_stmt
b731b390 3711 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3713 if (j == 0 && l == 0)
3714 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3715 else
3716 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3717
3718 prev_stmt_info = vinfo_for_stmt (new_stmt);
3719 }
3720
3721 if (ratype)
3722 {
3723 tree clobber = build_constructor (ratype, NULL);
3724 TREE_THIS_VOLATILE (clobber) = 1;
3725 new_stmt = gimple_build_assign (new_temp, clobber);
3726 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3727 }
3728 continue;
3729 }
cf1b2ba4 3730 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 3731 {
cf1b2ba4
RS
3732 unsigned int k = (simd_clone_subparts (vectype)
3733 / simd_clone_subparts (rtype));
0136f8f0
AH
3734 gcc_assert ((k & (k - 1)) == 0);
3735 if ((j & (k - 1)) == 0)
3736 vec_alloc (ret_ctor_elts, k);
3737 if (ratype)
3738 {
cf1b2ba4 3739 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
3740 for (m = 0; m < o; m++)
3741 {
3742 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3743 size_int (m), NULL_TREE, NULL_TREE);
3744 new_stmt
b731b390 3745 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3746 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3747 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3748 gimple_assign_lhs (new_stmt));
3749 }
3750 tree clobber = build_constructor (ratype, NULL);
3751 TREE_THIS_VOLATILE (clobber) = 1;
3752 new_stmt = gimple_build_assign (new_temp, clobber);
3753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3754 }
3755 else
3756 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3757 if ((j & (k - 1)) != k - 1)
3758 continue;
3759 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3760 new_stmt
b731b390 3761 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3762 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3763
3764 if ((unsigned) j == k - 1)
3765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3766 else
3767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3768
3769 prev_stmt_info = vinfo_for_stmt (new_stmt);
3770 continue;
3771 }
3772 else if (ratype)
3773 {
3774 tree t = build_fold_addr_expr (new_temp);
3775 t = build2 (MEM_REF, vectype, t,
3776 build_int_cst (TREE_TYPE (t), 0));
3777 new_stmt
b731b390 3778 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3779 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3780 tree clobber = build_constructor (ratype, NULL);
3781 TREE_THIS_VOLATILE (clobber) = 1;
3782 vect_finish_stmt_generation (stmt,
3783 gimple_build_assign (new_temp,
3784 clobber), gsi);
3785 }
3786 }
3787
3788 if (j == 0)
3789 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3790 else
3791 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3792
3793 prev_stmt_info = vinfo_for_stmt (new_stmt);
3794 }
3795
3796 vargs.release ();
3797
3798 /* The call in STMT might prevent it from being removed in dce.
3799 We however cannot remove it here, due to the way the ssa name
3800 it defines is mapped to the new definition. So just replace
3801 rhs of the statement with something harmless. */
3802
3803 if (slp_node)
3804 return true;
3805
3806 if (scalar_dest)
3807 {
3808 type = TREE_TYPE (scalar_dest);
3809 if (is_pattern_stmt_p (stmt_info))
3810 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3811 else
3812 lhs = gimple_call_lhs (stmt);
3813 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3814 }
3815 else
3816 new_stmt = gimple_build_nop ();
3817 set_vinfo_for_stmt (new_stmt, stmt_info);
3818 set_vinfo_for_stmt (stmt, NULL);
3819 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3820 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3821 unlink_stmt_vdef (stmt);
3822
3823 return true;
3824}
3825
3826
ebfd146a
IR
3827/* Function vect_gen_widened_results_half
3828
3829 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3830 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3831 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3832 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3833 needs to be created (DECL is a function-decl of a target-builtin).
3834 STMT is the original scalar stmt that we are vectorizing. */
3835
355fe088 3836static gimple *
ebfd146a
IR
3837vect_gen_widened_results_half (enum tree_code code,
3838 tree decl,
3839 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3840 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3841 gimple *stmt)
b8698a0f 3842{
355fe088 3843 gimple *new_stmt;
b8698a0f
L
3844 tree new_temp;
3845
3846 /* Generate half of the widened result: */
3847 if (code == CALL_EXPR)
3848 {
3849 /* Target specific support */
ebfd146a
IR
3850 if (op_type == binary_op)
3851 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3852 else
3853 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3854 new_temp = make_ssa_name (vec_dest, new_stmt);
3855 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3856 }
3857 else
ebfd146a 3858 {
b8698a0f
L
3859 /* Generic support */
3860 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3861 if (op_type != binary_op)
3862 vec_oprnd1 = NULL;
0d0e4a03 3863 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3864 new_temp = make_ssa_name (vec_dest, new_stmt);
3865 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3866 }
ebfd146a
IR
3867 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3868
ebfd146a
IR
3869 return new_stmt;
3870}
3871
4a00c761
JJ
3872
3873/* Get vectorized definitions for loop-based vectorization. For the first
3874 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3875 scalar operand), and for the rest we get a copy with
3876 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3877 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3878 The vectors are collected into VEC_OPRNDS. */
3879
3880static void
355fe088 3881vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3882 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3883{
3884 tree vec_oprnd;
3885
3886 /* Get first vector operand. */
3887 /* All the vector operands except the very first one (that is scalar oprnd)
3888 are stmt copies. */
3889 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3890 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
3891 else
3892 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3893
9771b263 3894 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3895
3896 /* Get second vector operand. */
3897 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3898 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3899
3900 *oprnd = vec_oprnd;
3901
3902 /* For conversion in multiple steps, continue to get operands
3903 recursively. */
3904 if (multi_step_cvt)
3905 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3906}
3907
3908
3909/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3910 For multi-step conversions store the resulting vectors and call the function
3911 recursively. */
3912
3913static void
9771b263 3914vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 3915 int multi_step_cvt, gimple *stmt,
9771b263 3916 vec<tree> vec_dsts,
4a00c761
JJ
3917 gimple_stmt_iterator *gsi,
3918 slp_tree slp_node, enum tree_code code,
3919 stmt_vec_info *prev_stmt_info)
3920{
3921 unsigned int i;
3922 tree vop0, vop1, new_tmp, vec_dest;
355fe088 3923 gimple *new_stmt;
4a00c761
JJ
3924 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3925
9771b263 3926 vec_dest = vec_dsts.pop ();
4a00c761 3927
9771b263 3928 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3929 {
3930 /* Create demotion operation. */
9771b263
DN
3931 vop0 = (*vec_oprnds)[i];
3932 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3933 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3934 new_tmp = make_ssa_name (vec_dest, new_stmt);
3935 gimple_assign_set_lhs (new_stmt, new_tmp);
3936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3937
3938 if (multi_step_cvt)
3939 /* Store the resulting vector for next recursive call. */
9771b263 3940 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3941 else
3942 {
3943 /* This is the last step of the conversion sequence. Store the
3944 vectors in SLP_NODE or in vector info of the scalar statement
3945 (or in STMT_VINFO_RELATED_STMT chain). */
3946 if (slp_node)
9771b263 3947 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 3948 else
c689ce1e
RB
3949 {
3950 if (!*prev_stmt_info)
3951 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3952 else
3953 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 3954
c689ce1e
RB
3955 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3956 }
4a00c761
JJ
3957 }
3958 }
3959
3960 /* For multi-step demotion operations we first generate demotion operations
3961 from the source type to the intermediate types, and then combine the
3962 results (stored in VEC_OPRNDS) in demotion operation to the destination
3963 type. */
3964 if (multi_step_cvt)
3965 {
3966 /* At each level of recursion we have half of the operands we had at the
3967 previous level. */
9771b263 3968 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3969 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3970 stmt, vec_dsts, gsi, slp_node,
3971 VEC_PACK_TRUNC_EXPR,
3972 prev_stmt_info);
3973 }
3974
9771b263 3975 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3976}
3977
3978
3979/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3980 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3981 the resulting vectors and call the function recursively. */
3982
3983static void
9771b263
DN
3984vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3985 vec<tree> *vec_oprnds1,
355fe088 3986 gimple *stmt, tree vec_dest,
4a00c761
JJ
3987 gimple_stmt_iterator *gsi,
3988 enum tree_code code1,
3989 enum tree_code code2, tree decl1,
3990 tree decl2, int op_type)
3991{
3992 int i;
3993 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 3994 gimple *new_stmt1, *new_stmt2;
6e1aa848 3995 vec<tree> vec_tmp = vNULL;
4a00c761 3996
9771b263
DN
3997 vec_tmp.create (vec_oprnds0->length () * 2);
3998 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3999 {
4000 if (op_type == binary_op)
9771b263 4001 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4002 else
4003 vop1 = NULL_TREE;
4004
4005 /* Generate the two halves of promotion operation. */
4006 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4007 op_type, vec_dest, gsi, stmt);
4008 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4009 op_type, vec_dest, gsi, stmt);
4010 if (is_gimple_call (new_stmt1))
4011 {
4012 new_tmp1 = gimple_call_lhs (new_stmt1);
4013 new_tmp2 = gimple_call_lhs (new_stmt2);
4014 }
4015 else
4016 {
4017 new_tmp1 = gimple_assign_lhs (new_stmt1);
4018 new_tmp2 = gimple_assign_lhs (new_stmt2);
4019 }
4020
4021 /* Store the results for the next step. */
9771b263
DN
4022 vec_tmp.quick_push (new_tmp1);
4023 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4024 }
4025
689eaba3 4026 vec_oprnds0->release ();
4a00c761
JJ
4027 *vec_oprnds0 = vec_tmp;
4028}
4029
4030
b8698a0f
L
4031/* Check if STMT performs a conversion operation, that can be vectorized.
4032 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4033 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4034 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4035
4036static bool
355fe088
TS
4037vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4038 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4039{
4040 tree vec_dest;
4041 tree scalar_dest;
4a00c761 4042 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4043 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4044 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4045 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4046 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4047 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4048 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4049 tree new_temp;
355fe088 4050 gimple *def_stmt;
ebfd146a 4051 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4052 int ndts = 2;
355fe088 4053 gimple *new_stmt = NULL;
ebfd146a 4054 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4055 poly_uint64 nunits_in;
4056 poly_uint64 nunits_out;
ebfd146a 4057 tree vectype_out, vectype_in;
4a00c761
JJ
4058 int ncopies, i, j;
4059 tree lhs_type, rhs_type;
ebfd146a 4060 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4061 vec<tree> vec_oprnds0 = vNULL;
4062 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4063 tree vop0;
4a00c761 4064 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4065 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4066 int multi_step_cvt = 0;
6e1aa848 4067 vec<tree> interm_types = vNULL;
4a00c761
JJ
4068 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4069 int op_type;
4a00c761 4070 unsigned short fltsz;
ebfd146a
IR
4071
4072 /* Is STMT a vectorizable conversion? */
4073
4a00c761 4074 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4075 return false;
4076
66c16fd9
RB
4077 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4078 && ! vec_stmt)
ebfd146a
IR
4079 return false;
4080
4081 if (!is_gimple_assign (stmt))
4082 return false;
4083
4084 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4085 return false;
4086
4087 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4088 if (!CONVERT_EXPR_CODE_P (code)
4089 && code != FIX_TRUNC_EXPR
4090 && code != FLOAT_EXPR
4091 && code != WIDEN_MULT_EXPR
4092 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4093 return false;
4094
4a00c761
JJ
4095 op_type = TREE_CODE_LENGTH (code);
4096
ebfd146a 4097 /* Check types of lhs and rhs. */
b690cc0f 4098 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4099 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4100 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4101
ebfd146a
IR
4102 op0 = gimple_assign_rhs1 (stmt);
4103 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4104
4105 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4106 && !((INTEGRAL_TYPE_P (lhs_type)
4107 && INTEGRAL_TYPE_P (rhs_type))
4108 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4109 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4110 return false;
4111
e6f5c25d
IE
4112 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4113 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4114 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4115 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4116 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4117 {
73fbfcad 4118 if (dump_enabled_p ())
78c60e3d 4119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4120 "type conversion to/from bit-precision unsupported."
4121 "\n");
4a00c761
JJ
4122 return false;
4123 }
4124
b690cc0f 4125 /* Check the operands of the operation. */
81c40241 4126 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4127 {
73fbfcad 4128 if (dump_enabled_p ())
78c60e3d 4129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4130 "use not simple.\n");
b690cc0f
RG
4131 return false;
4132 }
4a00c761
JJ
4133 if (op_type == binary_op)
4134 {
4135 bool ok;
4136
4137 op1 = gimple_assign_rhs2 (stmt);
4138 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4139 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4140 OP1. */
4141 if (CONSTANT_CLASS_P (op0))
81c40241 4142 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4143 else
81c40241 4144 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4145
4146 if (!ok)
4147 {
73fbfcad 4148 if (dump_enabled_p ())
78c60e3d 4149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4150 "use not simple.\n");
4a00c761
JJ
4151 return false;
4152 }
4153 }
4154
b690cc0f
RG
4155 /* If op0 is an external or constant defs use a vector type of
4156 the same size as the output vector type. */
ebfd146a 4157 if (!vectype_in)
b690cc0f 4158 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4159 if (vec_stmt)
4160 gcc_assert (vectype_in);
4161 if (!vectype_in)
4162 {
73fbfcad 4163 if (dump_enabled_p ())
4a00c761 4164 {
78c60e3d
SS
4165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4166 "no vectype for scalar type ");
4167 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4168 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4169 }
7d8930a0
IR
4170
4171 return false;
4172 }
ebfd146a 4173
e6f5c25d
IE
4174 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4175 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4176 {
4177 if (dump_enabled_p ())
4178 {
4179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4180 "can't convert between boolean and non "
4181 "boolean vectors");
4182 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4183 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4184 }
4185
4186 return false;
4187 }
4188
b690cc0f
RG
4189 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4190 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4191 if (known_eq (nunits_out, nunits_in))
ebfd146a 4192 modifier = NONE;
062d5ccc
RS
4193 else if (multiple_p (nunits_out, nunits_in))
4194 modifier = NARROW;
ebfd146a 4195 else
062d5ccc
RS
4196 {
4197 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4198 modifier = WIDEN;
4199 }
ebfd146a 4200
ff802fa1
IR
4201 /* Multiple types in SLP are handled by creating the appropriate number of
4202 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4203 case of SLP. */
fce57248 4204 if (slp_node)
ebfd146a 4205 ncopies = 1;
4a00c761 4206 else if (modifier == NARROW)
e8f142e2 4207 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4208 else
e8f142e2 4209 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4210
ebfd146a
IR
4211 /* Sanity check: make sure that at least one copy of the vectorized stmt
4212 needs to be generated. */
4213 gcc_assert (ncopies >= 1);
4214
16d22000
RS
4215 bool found_mode = false;
4216 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4217 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4218 opt_scalar_mode rhs_mode_iter;
b397965c 4219
ebfd146a 4220 /* Supportable by target? */
4a00c761 4221 switch (modifier)
ebfd146a 4222 {
4a00c761
JJ
4223 case NONE:
4224 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4225 return false;
4226 if (supportable_convert_operation (code, vectype_out, vectype_in,
4227 &decl1, &code1))
4228 break;
4229 /* FALLTHRU */
4230 unsupported:
73fbfcad 4231 if (dump_enabled_p ())
78c60e3d 4232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4233 "conversion not supported by target.\n");
ebfd146a 4234 return false;
ebfd146a 4235
4a00c761
JJ
4236 case WIDEN:
4237 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4238 &code1, &code2, &multi_step_cvt,
4239 &interm_types))
4a00c761
JJ
4240 {
4241 /* Binary widening operation can only be supported directly by the
4242 architecture. */
4243 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4244 break;
4245 }
4246
4247 if (code != FLOAT_EXPR
b397965c 4248 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4249 goto unsupported;
4250
b397965c 4251 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4252 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4253 {
16d22000 4254 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4255 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4256 break;
4257
4a00c761
JJ
4258 cvt_type
4259 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4260 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4261 if (cvt_type == NULL_TREE)
4262 goto unsupported;
4263
4264 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4265 {
4266 if (!supportable_convert_operation (code, vectype_out,
4267 cvt_type, &decl1, &codecvt1))
4268 goto unsupported;
4269 }
4270 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4271 cvt_type, &codecvt1,
4272 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4273 &interm_types))
4274 continue;
4275 else
4276 gcc_assert (multi_step_cvt == 0);
4277
4278 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4279 vectype_in, &code1, &code2,
4280 &multi_step_cvt, &interm_types))
16d22000
RS
4281 {
4282 found_mode = true;
4283 break;
4284 }
4a00c761
JJ
4285 }
4286
16d22000 4287 if (!found_mode)
4a00c761
JJ
4288 goto unsupported;
4289
4290 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4291 codecvt2 = ERROR_MARK;
4292 else
4293 {
4294 multi_step_cvt++;
9771b263 4295 interm_types.safe_push (cvt_type);
4a00c761
JJ
4296 cvt_type = NULL_TREE;
4297 }
4298 break;
4299
4300 case NARROW:
4301 gcc_assert (op_type == unary_op);
4302 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4303 &code1, &multi_step_cvt,
4304 &interm_types))
4305 break;
4306
4307 if (code != FIX_TRUNC_EXPR
b397965c 4308 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4309 goto unsupported;
4310
4a00c761
JJ
4311 cvt_type
4312 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4313 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4314 if (cvt_type == NULL_TREE)
4315 goto unsupported;
4316 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4317 &decl1, &codecvt1))
4318 goto unsupported;
4319 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4320 &code1, &multi_step_cvt,
4321 &interm_types))
4322 break;
4323 goto unsupported;
4324
4325 default:
4326 gcc_unreachable ();
ebfd146a
IR
4327 }
4328
4329 if (!vec_stmt) /* transformation not required. */
4330 {
73fbfcad 4331 if (dump_enabled_p ())
78c60e3d 4332 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4333 "=== vectorizable_conversion ===\n");
4a00c761 4334 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4335 {
4336 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4fc5ebf1 4337 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
8bd37302 4338 }
4a00c761
JJ
4339 else if (modifier == NARROW)
4340 {
4341 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 4342 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
4343 }
4344 else
4345 {
4346 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 4347 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 4348 }
9771b263 4349 interm_types.release ();
ebfd146a
IR
4350 return true;
4351 }
4352
67b8dbac 4353 /* Transform. */
73fbfcad 4354 if (dump_enabled_p ())
78c60e3d 4355 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4356 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4357
4a00c761
JJ
4358 if (op_type == binary_op)
4359 {
4360 if (CONSTANT_CLASS_P (op0))
4361 op0 = fold_convert (TREE_TYPE (op1), op0);
4362 else if (CONSTANT_CLASS_P (op1))
4363 op1 = fold_convert (TREE_TYPE (op0), op1);
4364 }
4365
4366 /* In case of multi-step conversion, we first generate conversion operations
4367 to the intermediate types, and then from that types to the final one.
4368 We create vector destinations for the intermediate type (TYPES) received
4369 from supportable_*_operation, and store them in the correct order
4370 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4371 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4372 vec_dest = vect_create_destination_var (scalar_dest,
4373 (cvt_type && modifier == WIDEN)
4374 ? cvt_type : vectype_out);
9771b263 4375 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4376
4377 if (multi_step_cvt)
4378 {
9771b263
DN
4379 for (i = interm_types.length () - 1;
4380 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4381 {
4382 vec_dest = vect_create_destination_var (scalar_dest,
4383 intermediate_type);
9771b263 4384 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4385 }
4386 }
ebfd146a 4387
4a00c761 4388 if (cvt_type)
82294ec1
JJ
4389 vec_dest = vect_create_destination_var (scalar_dest,
4390 modifier == WIDEN
4391 ? vectype_out : cvt_type);
4a00c761
JJ
4392
4393 if (!slp_node)
4394 {
30862efc 4395 if (modifier == WIDEN)
4a00c761 4396 {
c3284718 4397 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4398 if (op_type == binary_op)
9771b263 4399 vec_oprnds1.create (1);
4a00c761 4400 }
30862efc 4401 else if (modifier == NARROW)
9771b263
DN
4402 vec_oprnds0.create (
4403 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4404 }
4405 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4406 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4407
4a00c761 4408 last_oprnd = op0;
ebfd146a
IR
4409 prev_stmt_info = NULL;
4410 switch (modifier)
4411 {
4412 case NONE:
4413 for (j = 0; j < ncopies; j++)
4414 {
ebfd146a 4415 if (j == 0)
306b0c92 4416 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4417 else
4418 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4419
9771b263 4420 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4421 {
4422 /* Arguments are ready, create the new vector stmt. */
4423 if (code1 == CALL_EXPR)
4424 {
4425 new_stmt = gimple_build_call (decl1, 1, vop0);
4426 new_temp = make_ssa_name (vec_dest, new_stmt);
4427 gimple_call_set_lhs (new_stmt, new_temp);
4428 }
4429 else
4430 {
4431 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4432 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4433 new_temp = make_ssa_name (vec_dest, new_stmt);
4434 gimple_assign_set_lhs (new_stmt, new_temp);
4435 }
4436
4437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4438 if (slp_node)
9771b263 4439 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4440 else
4441 {
4442 if (!prev_stmt_info)
4443 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4444 else
4445 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4446 prev_stmt_info = vinfo_for_stmt (new_stmt);
4447 }
4a00c761 4448 }
ebfd146a
IR
4449 }
4450 break;
4451
4452 case WIDEN:
4453 /* In case the vectorization factor (VF) is bigger than the number
4454 of elements that we can fit in a vectype (nunits), we have to
4455 generate more than one vector stmt - i.e - we need to "unroll"
4456 the vector stmt by a factor VF/nunits. */
4457 for (j = 0; j < ncopies; j++)
4458 {
4a00c761 4459 /* Handle uses. */
ebfd146a 4460 if (j == 0)
4a00c761
JJ
4461 {
4462 if (slp_node)
4463 {
4464 if (code == WIDEN_LSHIFT_EXPR)
4465 {
4466 unsigned int k;
ebfd146a 4467
4a00c761
JJ
4468 vec_oprnd1 = op1;
4469 /* Store vec_oprnd1 for every vector stmt to be created
4470 for SLP_NODE. We check during the analysis that all
4471 the shift arguments are the same. */
4472 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4473 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4474
4475 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4476 slp_node);
4a00c761
JJ
4477 }
4478 else
4479 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4480 &vec_oprnds1, slp_node);
4a00c761
JJ
4481 }
4482 else
4483 {
81c40241 4484 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4485 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4486 if (op_type == binary_op)
4487 {
4488 if (code == WIDEN_LSHIFT_EXPR)
4489 vec_oprnd1 = op1;
4490 else
81c40241 4491 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4492 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4493 }
4494 }
4495 }
ebfd146a 4496 else
4a00c761
JJ
4497 {
4498 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4499 vec_oprnds0.truncate (0);
4500 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4501 if (op_type == binary_op)
4502 {
4503 if (code == WIDEN_LSHIFT_EXPR)
4504 vec_oprnd1 = op1;
4505 else
4506 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4507 vec_oprnd1);
9771b263
DN
4508 vec_oprnds1.truncate (0);
4509 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4510 }
4511 }
ebfd146a 4512
4a00c761
JJ
4513 /* Arguments are ready. Create the new vector stmts. */
4514 for (i = multi_step_cvt; i >= 0; i--)
4515 {
9771b263 4516 tree this_dest = vec_dsts[i];
4a00c761
JJ
4517 enum tree_code c1 = code1, c2 = code2;
4518 if (i == 0 && codecvt2 != ERROR_MARK)
4519 {
4520 c1 = codecvt1;
4521 c2 = codecvt2;
4522 }
4523 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4524 &vec_oprnds1,
4525 stmt, this_dest, gsi,
4526 c1, c2, decl1, decl2,
4527 op_type);
4528 }
4529
9771b263 4530 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4531 {
4532 if (cvt_type)
4533 {
4534 if (codecvt1 == CALL_EXPR)
4535 {
4536 new_stmt = gimple_build_call (decl1, 1, vop0);
4537 new_temp = make_ssa_name (vec_dest, new_stmt);
4538 gimple_call_set_lhs (new_stmt, new_temp);
4539 }
4540 else
4541 {
4542 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4543 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4544 new_stmt = gimple_build_assign (new_temp, codecvt1,
4545 vop0);
4a00c761
JJ
4546 }
4547
4548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4549 }
4550 else
4551 new_stmt = SSA_NAME_DEF_STMT (vop0);
4552
4553 if (slp_node)
9771b263 4554 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4555 else
c689ce1e
RB
4556 {
4557 if (!prev_stmt_info)
4558 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4559 else
4560 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4561 prev_stmt_info = vinfo_for_stmt (new_stmt);
4562 }
4a00c761 4563 }
ebfd146a 4564 }
4a00c761
JJ
4565
4566 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4567 break;
4568
4569 case NARROW:
4570 /* In case the vectorization factor (VF) is bigger than the number
4571 of elements that we can fit in a vectype (nunits), we have to
4572 generate more than one vector stmt - i.e - we need to "unroll"
4573 the vector stmt by a factor VF/nunits. */
4574 for (j = 0; j < ncopies; j++)
4575 {
4576 /* Handle uses. */
4a00c761
JJ
4577 if (slp_node)
4578 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4579 slp_node);
ebfd146a
IR
4580 else
4581 {
9771b263 4582 vec_oprnds0.truncate (0);
4a00c761
JJ
4583 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4584 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4585 }
4586
4a00c761
JJ
4587 /* Arguments are ready. Create the new vector stmts. */
4588 if (cvt_type)
9771b263 4589 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4590 {
4591 if (codecvt1 == CALL_EXPR)
4592 {
4593 new_stmt = gimple_build_call (decl1, 1, vop0);
4594 new_temp = make_ssa_name (vec_dest, new_stmt);
4595 gimple_call_set_lhs (new_stmt, new_temp);
4596 }
4597 else
4598 {
4599 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4600 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4601 new_stmt = gimple_build_assign (new_temp, codecvt1,
4602 vop0);
4a00c761 4603 }
ebfd146a 4604
4a00c761 4605 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4606 vec_oprnds0[i] = new_temp;
4a00c761 4607 }
ebfd146a 4608
4a00c761
JJ
4609 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4610 stmt, vec_dsts, gsi,
4611 slp_node, code1,
4612 &prev_stmt_info);
ebfd146a
IR
4613 }
4614
4615 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4616 break;
ebfd146a
IR
4617 }
4618
9771b263
DN
4619 vec_oprnds0.release ();
4620 vec_oprnds1.release ();
9771b263 4621 interm_types.release ();
ebfd146a
IR
4622
4623 return true;
4624}
ff802fa1
IR
4625
4626
ebfd146a
IR
4627/* Function vectorizable_assignment.
4628
b8698a0f
L
4629 Check if STMT performs an assignment (copy) that can be vectorized.
4630 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4631 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4632 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4633
4634static bool
355fe088
TS
4635vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4636 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4637{
4638 tree vec_dest;
4639 tree scalar_dest;
4640 tree op;
4641 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4642 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4643 tree new_temp;
355fe088 4644 gimple *def_stmt;
4fc5ebf1
JG
4645 enum vect_def_type dt[1] = {vect_unknown_def_type};
4646 int ndts = 1;
ebfd146a 4647 int ncopies;
f18b55bd 4648 int i, j;
6e1aa848 4649 vec<tree> vec_oprnds = vNULL;
ebfd146a 4650 tree vop;
a70d6342 4651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4652 vec_info *vinfo = stmt_info->vinfo;
355fe088 4653 gimple *new_stmt = NULL;
f18b55bd 4654 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4655 enum tree_code code;
4656 tree vectype_in;
ebfd146a 4657
a70d6342 4658 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4659 return false;
4660
66c16fd9
RB
4661 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4662 && ! vec_stmt)
ebfd146a
IR
4663 return false;
4664
4665 /* Is vectorizable assignment? */
4666 if (!is_gimple_assign (stmt))
4667 return false;
4668
4669 scalar_dest = gimple_assign_lhs (stmt);
4670 if (TREE_CODE (scalar_dest) != SSA_NAME)
4671 return false;
4672
fde9c428 4673 code = gimple_assign_rhs_code (stmt);
ebfd146a 4674 if (gimple_assign_single_p (stmt)
fde9c428
RG
4675 || code == PAREN_EXPR
4676 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4677 op = gimple_assign_rhs1 (stmt);
4678 else
4679 return false;
4680
7b7ec6c5
RG
4681 if (code == VIEW_CONVERT_EXPR)
4682 op = TREE_OPERAND (op, 0);
4683
465c8c19 4684 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 4685 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
4686
4687 /* Multiple types in SLP are handled by creating the appropriate number of
4688 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4689 case of SLP. */
fce57248 4690 if (slp_node)
465c8c19
JJ
4691 ncopies = 1;
4692 else
e8f142e2 4693 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
4694
4695 gcc_assert (ncopies >= 1);
4696
81c40241 4697 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4698 {
73fbfcad 4699 if (dump_enabled_p ())
78c60e3d 4700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4701 "use not simple.\n");
ebfd146a
IR
4702 return false;
4703 }
4704
fde9c428
RG
4705 /* We can handle NOP_EXPR conversions that do not change the number
4706 of elements or the vector size. */
7b7ec6c5
RG
4707 if ((CONVERT_EXPR_CODE_P (code)
4708 || code == VIEW_CONVERT_EXPR)
fde9c428 4709 && (!vectype_in
928686b1 4710 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
4711 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
4712 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
4713 return false;
4714
7b7b1813
RG
4715 /* We do not handle bit-precision changes. */
4716 if ((CONVERT_EXPR_CODE_P (code)
4717 || code == VIEW_CONVERT_EXPR)
4718 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
4719 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4720 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
4721 /* But a conversion that does not change the bit-pattern is ok. */
4722 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4723 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4724 && TYPE_UNSIGNED (TREE_TYPE (op)))
4725 /* Conversion between boolean types of different sizes is
4726 a simple assignment in case their vectypes are same
4727 boolean vectors. */
4728 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4729 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4730 {
73fbfcad 4731 if (dump_enabled_p ())
78c60e3d
SS
4732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4733 "type conversion to/from bit-precision "
e645e942 4734 "unsupported.\n");
7b7b1813
RG
4735 return false;
4736 }
4737
ebfd146a
IR
4738 if (!vec_stmt) /* transformation not required. */
4739 {
4740 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4741 if (dump_enabled_p ())
78c60e3d 4742 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4743 "=== vectorizable_assignment ===\n");
4fc5ebf1 4744 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
4745 return true;
4746 }
4747
67b8dbac 4748 /* Transform. */
73fbfcad 4749 if (dump_enabled_p ())
e645e942 4750 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4751
4752 /* Handle def. */
4753 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4754
4755 /* Handle use. */
f18b55bd 4756 for (j = 0; j < ncopies; j++)
ebfd146a 4757 {
f18b55bd
IR
4758 /* Handle uses. */
4759 if (j == 0)
306b0c92 4760 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
4761 else
4762 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4763
4764 /* Arguments are ready. create the new vector stmt. */
9771b263 4765 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4766 {
7b7ec6c5
RG
4767 if (CONVERT_EXPR_CODE_P (code)
4768 || code == VIEW_CONVERT_EXPR)
4a73490d 4769 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4770 new_stmt = gimple_build_assign (vec_dest, vop);
4771 new_temp = make_ssa_name (vec_dest, new_stmt);
4772 gimple_assign_set_lhs (new_stmt, new_temp);
4773 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4774 if (slp_node)
9771b263 4775 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4776 }
ebfd146a
IR
4777
4778 if (slp_node)
f18b55bd
IR
4779 continue;
4780
4781 if (j == 0)
4782 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4783 else
4784 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4785
4786 prev_stmt_info = vinfo_for_stmt (new_stmt);
4787 }
b8698a0f 4788
9771b263 4789 vec_oprnds.release ();
ebfd146a
IR
4790 return true;
4791}
4792
9dc3f7de 4793
1107f3ae
IR
4794/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4795 either as shift by a scalar or by a vector. */
4796
4797bool
4798vect_supportable_shift (enum tree_code code, tree scalar_type)
4799{
4800
ef4bddc2 4801 machine_mode vec_mode;
1107f3ae
IR
4802 optab optab;
4803 int icode;
4804 tree vectype;
4805
4806 vectype = get_vectype_for_scalar_type (scalar_type);
4807 if (!vectype)
4808 return false;
4809
4810 optab = optab_for_tree_code (code, vectype, optab_scalar);
4811 if (!optab
4812 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4813 {
4814 optab = optab_for_tree_code (code, vectype, optab_vector);
4815 if (!optab
4816 || (optab_handler (optab, TYPE_MODE (vectype))
4817 == CODE_FOR_nothing))
4818 return false;
4819 }
4820
4821 vec_mode = TYPE_MODE (vectype);
4822 icode = (int) optab_handler (optab, vec_mode);
4823 if (icode == CODE_FOR_nothing)
4824 return false;
4825
4826 return true;
4827}
4828
4829
9dc3f7de
IR
4830/* Function vectorizable_shift.
4831
4832 Check if STMT performs a shift operation that can be vectorized.
4833 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4834 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4835 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4836
4837static bool
355fe088
TS
4838vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4839 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4840{
4841 tree vec_dest;
4842 tree scalar_dest;
4843 tree op0, op1 = NULL;
4844 tree vec_oprnd1 = NULL_TREE;
4845 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4846 tree vectype;
4847 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4848 enum tree_code code;
ef4bddc2 4849 machine_mode vec_mode;
9dc3f7de
IR
4850 tree new_temp;
4851 optab optab;
4852 int icode;
ef4bddc2 4853 machine_mode optab_op2_mode;
355fe088 4854 gimple *def_stmt;
9dc3f7de 4855 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4856 int ndts = 2;
355fe088 4857 gimple *new_stmt = NULL;
9dc3f7de 4858 stmt_vec_info prev_stmt_info;
928686b1
RS
4859 poly_uint64 nunits_in;
4860 poly_uint64 nunits_out;
9dc3f7de 4861 tree vectype_out;
cede2577 4862 tree op1_vectype;
9dc3f7de
IR
4863 int ncopies;
4864 int j, i;
6e1aa848
DN
4865 vec<tree> vec_oprnds0 = vNULL;
4866 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4867 tree vop0, vop1;
4868 unsigned int k;
49eab32e 4869 bool scalar_shift_arg = true;
9dc3f7de 4870 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4871 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4872
4873 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4874 return false;
4875
66c16fd9
RB
4876 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4877 && ! vec_stmt)
9dc3f7de
IR
4878 return false;
4879
4880 /* Is STMT a vectorizable binary/unary operation? */
4881 if (!is_gimple_assign (stmt))
4882 return false;
4883
4884 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4885 return false;
4886
4887 code = gimple_assign_rhs_code (stmt);
4888
4889 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4890 || code == RROTATE_EXPR))
4891 return false;
4892
4893 scalar_dest = gimple_assign_lhs (stmt);
4894 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 4895 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 4896 {
73fbfcad 4897 if (dump_enabled_p ())
78c60e3d 4898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4899 "bit-precision shifts not supported.\n");
7b7b1813
RG
4900 return false;
4901 }
9dc3f7de
IR
4902
4903 op0 = gimple_assign_rhs1 (stmt);
81c40241 4904 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 4905 {
73fbfcad 4906 if (dump_enabled_p ())
78c60e3d 4907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4908 "use not simple.\n");
9dc3f7de
IR
4909 return false;
4910 }
4911 /* If op0 is an external or constant def use a vector type with
4912 the same size as the output vector type. */
4913 if (!vectype)
4914 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4915 if (vec_stmt)
4916 gcc_assert (vectype);
4917 if (!vectype)
4918 {
73fbfcad 4919 if (dump_enabled_p ())
78c60e3d 4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4921 "no vectype for scalar type\n");
9dc3f7de
IR
4922 return false;
4923 }
4924
4925 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4926 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 4927 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
4928 return false;
4929
4930 op1 = gimple_assign_rhs2 (stmt);
81c40241 4931 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 4932 {
73fbfcad 4933 if (dump_enabled_p ())
78c60e3d 4934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4935 "use not simple.\n");
9dc3f7de
IR
4936 return false;
4937 }
4938
9dc3f7de
IR
4939 /* Multiple types in SLP are handled by creating the appropriate number of
4940 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4941 case of SLP. */
fce57248 4942 if (slp_node)
9dc3f7de
IR
4943 ncopies = 1;
4944 else
e8f142e2 4945 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
4946
4947 gcc_assert (ncopies >= 1);
4948
4949 /* Determine whether the shift amount is a vector, or scalar. If the
4950 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4951
dbfa87aa
YR
4952 if ((dt[1] == vect_internal_def
4953 || dt[1] == vect_induction_def)
4954 && !slp_node)
49eab32e
JJ
4955 scalar_shift_arg = false;
4956 else if (dt[1] == vect_constant_def
4957 || dt[1] == vect_external_def
4958 || dt[1] == vect_internal_def)
4959 {
4960 /* In SLP, need to check whether the shift count is the same,
4961 in loops if it is a constant or invariant, it is always
4962 a scalar shift. */
4963 if (slp_node)
4964 {
355fe088
TS
4965 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4966 gimple *slpstmt;
49eab32e 4967
9771b263 4968 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4969 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4970 scalar_shift_arg = false;
4971 }
60d393e8
RB
4972
4973 /* If the shift amount is computed by a pattern stmt we cannot
4974 use the scalar amount directly thus give up and use a vector
4975 shift. */
4976 if (dt[1] == vect_internal_def)
4977 {
4978 gimple *def = SSA_NAME_DEF_STMT (op1);
4979 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4980 scalar_shift_arg = false;
4981 }
49eab32e
JJ
4982 }
4983 else
4984 {
73fbfcad 4985 if (dump_enabled_p ())
78c60e3d 4986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4987 "operand mode requires invariant argument.\n");
49eab32e
JJ
4988 return false;
4989 }
4990
9dc3f7de 4991 /* Vector shifted by vector. */
49eab32e 4992 if (!scalar_shift_arg)
9dc3f7de
IR
4993 {
4994 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4995 if (dump_enabled_p ())
78c60e3d 4996 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4997 "vector/vector shift/rotate found.\n");
78c60e3d 4998
aa948027
JJ
4999 if (!op1_vectype)
5000 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5001 if (op1_vectype == NULL_TREE
5002 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5003 {
73fbfcad 5004 if (dump_enabled_p ())
78c60e3d
SS
5005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5006 "unusable type for last operand in"
e645e942 5007 " vector/vector shift/rotate.\n");
cede2577
JJ
5008 return false;
5009 }
9dc3f7de
IR
5010 }
5011 /* See if the machine has a vector shifted by scalar insn and if not
5012 then see if it has a vector shifted by vector insn. */
49eab32e 5013 else
9dc3f7de
IR
5014 {
5015 optab = optab_for_tree_code (code, vectype, optab_scalar);
5016 if (optab
5017 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5018 {
73fbfcad 5019 if (dump_enabled_p ())
78c60e3d 5020 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5021 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5022 }
5023 else
5024 {
5025 optab = optab_for_tree_code (code, vectype, optab_vector);
5026 if (optab
5027 && (optab_handler (optab, TYPE_MODE (vectype))
5028 != CODE_FOR_nothing))
5029 {
49eab32e
JJ
5030 scalar_shift_arg = false;
5031
73fbfcad 5032 if (dump_enabled_p ())
78c60e3d 5033 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5034 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5035
5036 /* Unlike the other binary operators, shifts/rotates have
5037 the rhs being int, instead of the same type as the lhs,
5038 so make sure the scalar is the right type if we are
aa948027 5039 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5040 if (dt[1] == vect_constant_def)
5041 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5042 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5043 TREE_TYPE (op1)))
5044 {
5045 if (slp_node
5046 && TYPE_MODE (TREE_TYPE (vectype))
5047 != TYPE_MODE (TREE_TYPE (op1)))
5048 {
73fbfcad 5049 if (dump_enabled_p ())
78c60e3d
SS
5050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5051 "unusable type for last operand in"
e645e942 5052 " vector/vector shift/rotate.\n");
21c0a521 5053 return false;
aa948027
JJ
5054 }
5055 if (vec_stmt && !slp_node)
5056 {
5057 op1 = fold_convert (TREE_TYPE (vectype), op1);
5058 op1 = vect_init_vector (stmt, op1,
5059 TREE_TYPE (vectype), NULL);
5060 }
5061 }
9dc3f7de
IR
5062 }
5063 }
5064 }
9dc3f7de
IR
5065
5066 /* Supportable by target? */
5067 if (!optab)
5068 {
73fbfcad 5069 if (dump_enabled_p ())
78c60e3d 5070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5071 "no optab.\n");
9dc3f7de
IR
5072 return false;
5073 }
5074 vec_mode = TYPE_MODE (vectype);
5075 icode = (int) optab_handler (optab, vec_mode);
5076 if (icode == CODE_FOR_nothing)
5077 {
73fbfcad 5078 if (dump_enabled_p ())
78c60e3d 5079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5080 "op not supported by target.\n");
9dc3f7de 5081 /* Check only during analysis. */
cf098191 5082 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5083 || (!vec_stmt
5084 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5085 return false;
73fbfcad 5086 if (dump_enabled_p ())
e645e942
TJ
5087 dump_printf_loc (MSG_NOTE, vect_location,
5088 "proceeding using word mode.\n");
9dc3f7de
IR
5089 }
5090
5091 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5092 if (!vec_stmt
5093 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5094 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5095 {
73fbfcad 5096 if (dump_enabled_p ())
78c60e3d 5097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5098 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5099 return false;
5100 }
5101
5102 if (!vec_stmt) /* transformation not required. */
5103 {
5104 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5105 if (dump_enabled_p ())
e645e942
TJ
5106 dump_printf_loc (MSG_NOTE, vect_location,
5107 "=== vectorizable_shift ===\n");
4fc5ebf1 5108 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
9dc3f7de
IR
5109 return true;
5110 }
5111
67b8dbac 5112 /* Transform. */
9dc3f7de 5113
73fbfcad 5114 if (dump_enabled_p ())
78c60e3d 5115 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5116 "transform binary/unary operation.\n");
9dc3f7de
IR
5117
5118 /* Handle def. */
5119 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5120
9dc3f7de
IR
5121 prev_stmt_info = NULL;
5122 for (j = 0; j < ncopies; j++)
5123 {
5124 /* Handle uses. */
5125 if (j == 0)
5126 {
5127 if (scalar_shift_arg)
5128 {
5129 /* Vector shl and shr insn patterns can be defined with scalar
5130 operand 2 (shift operand). In this case, use constant or loop
5131 invariant op1 directly, without extending it to vector mode
5132 first. */
5133 optab_op2_mode = insn_data[icode].operand[2].mode;
5134 if (!VECTOR_MODE_P (optab_op2_mode))
5135 {
73fbfcad 5136 if (dump_enabled_p ())
78c60e3d 5137 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5138 "operand 1 using scalar mode.\n");
9dc3f7de 5139 vec_oprnd1 = op1;
8930f723 5140 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5141 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5142 if (slp_node)
5143 {
5144 /* Store vec_oprnd1 for every vector stmt to be created
5145 for SLP_NODE. We check during the analysis that all
5146 the shift arguments are the same.
5147 TODO: Allow different constants for different vector
5148 stmts generated for an SLP instance. */
5149 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5150 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5151 }
5152 }
5153 }
5154
5155 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5156 (a special case for certain kind of vector shifts); otherwise,
5157 operand 1 should be of a vector type (the usual case). */
5158 if (vec_oprnd1)
5159 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5160 slp_node);
9dc3f7de
IR
5161 else
5162 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5163 slp_node);
9dc3f7de
IR
5164 }
5165 else
5166 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5167
5168 /* Arguments are ready. Create the new vector stmt. */
9771b263 5169 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5170 {
9771b263 5171 vop1 = vec_oprnds1[i];
0d0e4a03 5172 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5173 new_temp = make_ssa_name (vec_dest, new_stmt);
5174 gimple_assign_set_lhs (new_stmt, new_temp);
5175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5176 if (slp_node)
9771b263 5177 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5178 }
5179
5180 if (slp_node)
5181 continue;
5182
5183 if (j == 0)
5184 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5185 else
5186 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5187 prev_stmt_info = vinfo_for_stmt (new_stmt);
5188 }
5189
9771b263
DN
5190 vec_oprnds0.release ();
5191 vec_oprnds1.release ();
9dc3f7de
IR
5192
5193 return true;
5194}
5195
5196
ebfd146a
IR
5197/* Function vectorizable_operation.
5198
16949072
RG
5199 Check if STMT performs a binary, unary or ternary operation that can
5200 be vectorized.
b8698a0f 5201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5204
5205static bool
355fe088
TS
5206vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5207 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 5208{
00f07b86 5209 tree vec_dest;
ebfd146a 5210 tree scalar_dest;
16949072 5211 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5212 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5213 tree vectype;
ebfd146a 5214 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5215 enum tree_code code, orig_code;
ef4bddc2 5216 machine_mode vec_mode;
ebfd146a
IR
5217 tree new_temp;
5218 int op_type;
00f07b86 5219 optab optab;
523ba738 5220 bool target_support_p;
355fe088 5221 gimple *def_stmt;
16949072
RG
5222 enum vect_def_type dt[3]
5223 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5224 int ndts = 3;
355fe088 5225 gimple *new_stmt = NULL;
ebfd146a 5226 stmt_vec_info prev_stmt_info;
928686b1
RS
5227 poly_uint64 nunits_in;
5228 poly_uint64 nunits_out;
ebfd146a
IR
5229 tree vectype_out;
5230 int ncopies;
5231 int j, i;
6e1aa848
DN
5232 vec<tree> vec_oprnds0 = vNULL;
5233 vec<tree> vec_oprnds1 = vNULL;
5234 vec<tree> vec_oprnds2 = vNULL;
16949072 5235 tree vop0, vop1, vop2;
a70d6342 5236 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5237 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5238
a70d6342 5239 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5240 return false;
5241
66c16fd9
RB
5242 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5243 && ! vec_stmt)
ebfd146a
IR
5244 return false;
5245
5246 /* Is STMT a vectorizable binary/unary operation? */
5247 if (!is_gimple_assign (stmt))
5248 return false;
5249
5250 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5251 return false;
5252
0eb952ea 5253 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5254
1af4ebf5
MG
5255 /* For pointer addition and subtraction, we should use the normal
5256 plus and minus for the vector operation. */
ebfd146a
IR
5257 if (code == POINTER_PLUS_EXPR)
5258 code = PLUS_EXPR;
1af4ebf5
MG
5259 if (code == POINTER_DIFF_EXPR)
5260 code = MINUS_EXPR;
ebfd146a
IR
5261
5262 /* Support only unary or binary operations. */
5263 op_type = TREE_CODE_LENGTH (code);
16949072 5264 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5265 {
73fbfcad 5266 if (dump_enabled_p ())
78c60e3d 5267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5268 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5269 op_type);
ebfd146a
IR
5270 return false;
5271 }
5272
b690cc0f
RG
5273 scalar_dest = gimple_assign_lhs (stmt);
5274 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5275
7b7b1813
RG
5276 /* Most operations cannot handle bit-precision types without extra
5277 truncations. */
045c1278 5278 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5279 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5280 /* Exception are bitwise binary operations. */
5281 && code != BIT_IOR_EXPR
5282 && code != BIT_XOR_EXPR
5283 && code != BIT_AND_EXPR)
5284 {
73fbfcad 5285 if (dump_enabled_p ())
78c60e3d 5286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5287 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5288 return false;
5289 }
5290
ebfd146a 5291 op0 = gimple_assign_rhs1 (stmt);
81c40241 5292 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5293 {
73fbfcad 5294 if (dump_enabled_p ())
78c60e3d 5295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5296 "use not simple.\n");
ebfd146a
IR
5297 return false;
5298 }
b690cc0f
RG
5299 /* If op0 is an external or constant def use a vector type with
5300 the same size as the output vector type. */
5301 if (!vectype)
b036c6c5
IE
5302 {
5303 /* For boolean type we cannot determine vectype by
5304 invariant value (don't know whether it is a vector
5305 of booleans or vector of integers). We use output
5306 vectype because operations on boolean don't change
5307 type. */
2568d8a1 5308 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5309 {
2568d8a1 5310 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5311 {
5312 if (dump_enabled_p ())
5313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5314 "not supported operation on bool value.\n");
5315 return false;
5316 }
5317 vectype = vectype_out;
5318 }
5319 else
5320 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5321 }
7d8930a0
IR
5322 if (vec_stmt)
5323 gcc_assert (vectype);
5324 if (!vectype)
5325 {
73fbfcad 5326 if (dump_enabled_p ())
7d8930a0 5327 {
78c60e3d
SS
5328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5329 "no vectype for scalar type ");
5330 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5331 TREE_TYPE (op0));
e645e942 5332 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5333 }
5334
5335 return false;
5336 }
b690cc0f
RG
5337
5338 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5339 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5340 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5341 return false;
ebfd146a 5342
16949072 5343 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5344 {
5345 op1 = gimple_assign_rhs2 (stmt);
81c40241 5346 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5347 {
73fbfcad 5348 if (dump_enabled_p ())
78c60e3d 5349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5350 "use not simple.\n");
ebfd146a
IR
5351 return false;
5352 }
5353 }
16949072
RG
5354 if (op_type == ternary_op)
5355 {
5356 op2 = gimple_assign_rhs3 (stmt);
81c40241 5357 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5358 {
73fbfcad 5359 if (dump_enabled_p ())
78c60e3d 5360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5361 "use not simple.\n");
16949072
RG
5362 return false;
5363 }
5364 }
ebfd146a 5365
b690cc0f 5366 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5367 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5368 case of SLP. */
fce57248 5369 if (slp_node)
b690cc0f
RG
5370 ncopies = 1;
5371 else
e8f142e2 5372 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5373
5374 gcc_assert (ncopies >= 1);
5375
9dc3f7de 5376 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5377 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5378 || code == RROTATE_EXPR)
9dc3f7de 5379 return false;
ebfd146a 5380
ebfd146a 5381 /* Supportable by target? */
00f07b86
RH
5382
5383 vec_mode = TYPE_MODE (vectype);
5384 if (code == MULT_HIGHPART_EXPR)
523ba738 5385 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5386 else
5387 {
5388 optab = optab_for_tree_code (code, vectype, optab_default);
5389 if (!optab)
5deb57cb 5390 {
73fbfcad 5391 if (dump_enabled_p ())
78c60e3d 5392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5393 "no optab.\n");
00f07b86 5394 return false;
5deb57cb 5395 }
523ba738
RS
5396 target_support_p = (optab_handler (optab, vec_mode)
5397 != CODE_FOR_nothing);
5deb57cb
JJ
5398 }
5399
523ba738 5400 if (!target_support_p)
ebfd146a 5401 {
73fbfcad 5402 if (dump_enabled_p ())
78c60e3d 5403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5404 "op not supported by target.\n");
ebfd146a 5405 /* Check only during analysis. */
cf098191 5406 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5407 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5408 return false;
73fbfcad 5409 if (dump_enabled_p ())
e645e942
TJ
5410 dump_printf_loc (MSG_NOTE, vect_location,
5411 "proceeding using word mode.\n");
383d9c83
IR
5412 }
5413
4a00c761 5414 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5415 if (!VECTOR_MODE_P (vec_mode)
5416 && !vec_stmt
ca09abcb 5417 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5418 {
73fbfcad 5419 if (dump_enabled_p ())
78c60e3d 5420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5421 "not worthwhile without SIMD support.\n");
e34842c6 5422 return false;
7d8930a0 5423 }
ebfd146a 5424
ebfd146a
IR
5425 if (!vec_stmt) /* transformation not required. */
5426 {
4a00c761 5427 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5428 if (dump_enabled_p ())
78c60e3d 5429 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5430 "=== vectorizable_operation ===\n");
4fc5ebf1 5431 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5432 return true;
5433 }
5434
67b8dbac 5435 /* Transform. */
ebfd146a 5436
73fbfcad 5437 if (dump_enabled_p ())
78c60e3d 5438 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5439 "transform binary/unary operation.\n");
383d9c83 5440
ebfd146a 5441 /* Handle def. */
00f07b86 5442 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5443
0eb952ea
JJ
5444 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5445 vectors with unsigned elements, but the result is signed. So, we
5446 need to compute the MINUS_EXPR into vectype temporary and
5447 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5448 tree vec_cvt_dest = NULL_TREE;
5449 if (orig_code == POINTER_DIFF_EXPR)
5450 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5451
ebfd146a
IR
5452 /* In case the vectorization factor (VF) is bigger than the number
5453 of elements that we can fit in a vectype (nunits), we have to generate
5454 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5455 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5456 from one copy of the vector stmt to the next, in the field
5457 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5458 stages to find the correct vector defs to be used when vectorizing
5459 stmts that use the defs of the current stmt. The example below
5460 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5461 we need to create 4 vectorized stmts):
5462
5463 before vectorization:
5464 RELATED_STMT VEC_STMT
5465 S1: x = memref - -
5466 S2: z = x + 1 - -
5467
5468 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5469 there):
5470 RELATED_STMT VEC_STMT
5471 VS1_0: vx0 = memref0 VS1_1 -
5472 VS1_1: vx1 = memref1 VS1_2 -
5473 VS1_2: vx2 = memref2 VS1_3 -
5474 VS1_3: vx3 = memref3 - -
5475 S1: x = load - VS1_0
5476 S2: z = x + 1 - -
5477
5478 step2: vectorize stmt S2 (done here):
5479 To vectorize stmt S2 we first need to find the relevant vector
5480 def for the first operand 'x'. This is, as usual, obtained from
5481 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5482 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5483 relevant vector def 'vx0'. Having found 'vx0' we can generate
5484 the vector stmt VS2_0, and as usual, record it in the
5485 STMT_VINFO_VEC_STMT of stmt S2.
5486 When creating the second copy (VS2_1), we obtain the relevant vector
5487 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5488 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5489 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5490 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5491 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5492 chain of stmts and pointers:
5493 RELATED_STMT VEC_STMT
5494 VS1_0: vx0 = memref0 VS1_1 -
5495 VS1_1: vx1 = memref1 VS1_2 -
5496 VS1_2: vx2 = memref2 VS1_3 -
5497 VS1_3: vx3 = memref3 - -
5498 S1: x = load - VS1_0
5499 VS2_0: vz0 = vx0 + v1 VS2_1 -
5500 VS2_1: vz1 = vx1 + v1 VS2_2 -
5501 VS2_2: vz2 = vx2 + v1 VS2_3 -
5502 VS2_3: vz3 = vx3 + v1 - -
5503 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5504
5505 prev_stmt_info = NULL;
5506 for (j = 0; j < ncopies; j++)
5507 {
5508 /* Handle uses. */
5509 if (j == 0)
4a00c761
JJ
5510 {
5511 if (op_type == binary_op || op_type == ternary_op)
5512 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5513 slp_node);
4a00c761
JJ
5514 else
5515 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5516 slp_node);
4a00c761 5517 if (op_type == ternary_op)
c392943c 5518 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
306b0c92 5519 slp_node);
4a00c761 5520 }
ebfd146a 5521 else
4a00c761
JJ
5522 {
5523 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5524 if (op_type == ternary_op)
5525 {
9771b263
DN
5526 tree vec_oprnd = vec_oprnds2.pop ();
5527 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5528 vec_oprnd));
4a00c761
JJ
5529 }
5530 }
5531
5532 /* Arguments are ready. Create the new vector stmt. */
9771b263 5533 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5534 {
4a00c761 5535 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5536 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5537 vop2 = ((op_type == ternary_op)
9771b263 5538 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5539 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5540 new_temp = make_ssa_name (vec_dest, new_stmt);
5541 gimple_assign_set_lhs (new_stmt, new_temp);
5542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
5543 if (vec_cvt_dest)
5544 {
5545 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5546 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5547 new_temp);
5548 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5549 gimple_assign_set_lhs (new_stmt, new_temp);
5550 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5551 }
4a00c761 5552 if (slp_node)
9771b263 5553 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5554 }
5555
4a00c761
JJ
5556 if (slp_node)
5557 continue;
5558
5559 if (j == 0)
5560 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5561 else
5562 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5563 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5564 }
5565
9771b263
DN
5566 vec_oprnds0.release ();
5567 vec_oprnds1.release ();
5568 vec_oprnds2.release ();
ebfd146a 5569
ebfd146a
IR
5570 return true;
5571}
5572
f702e7d4 5573/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
5574
5575static void
f702e7d4 5576ensure_base_align (struct data_reference *dr)
c716e67f
XDL
5577{
5578 if (!dr->aux)
5579 return;
5580
52639a61 5581 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 5582 {
52639a61 5583 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5584
f702e7d4
RS
5585 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5586
428f0c67 5587 if (decl_in_symtab_p (base_decl))
f702e7d4 5588 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
5589 else
5590 {
f702e7d4 5591 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
5592 DECL_USER_ALIGN (base_decl) = 1;
5593 }
52639a61 5594 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5595 }
5596}
5597
ebfd146a 5598
44fc7854
BE
5599/* Function get_group_alias_ptr_type.
5600
5601 Return the alias type for the group starting at FIRST_STMT. */
5602
5603static tree
5604get_group_alias_ptr_type (gimple *first_stmt)
5605{
5606 struct data_reference *first_dr, *next_dr;
5607 gimple *next_stmt;
5608
5609 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5610 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5611 while (next_stmt)
5612 {
5613 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5614 if (get_alias_set (DR_REF (first_dr))
5615 != get_alias_set (DR_REF (next_dr)))
5616 {
5617 if (dump_enabled_p ())
5618 dump_printf_loc (MSG_NOTE, vect_location,
5619 "conflicting alias set types.\n");
5620 return ptr_type_node;
5621 }
5622 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5623 }
5624 return reference_alias_ptr_type (DR_REF (first_dr));
5625}
5626
5627
ebfd146a
IR
5628/* Function vectorizable_store.
5629
b8698a0f
L
5630 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5631 can be vectorized.
5632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5633 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5635
5636static bool
355fe088 5637vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5638 slp_tree slp_node)
ebfd146a 5639{
ebfd146a
IR
5640 tree data_ref;
5641 tree op;
5642 tree vec_oprnd = NULL_TREE;
5643 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5644 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5645 tree elem_type;
ebfd146a 5646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5647 struct loop *loop = NULL;
ef4bddc2 5648 machine_mode vec_mode;
ebfd146a
IR
5649 tree dummy;
5650 enum dr_alignment_support alignment_support_scheme;
355fe088 5651 gimple *def_stmt;
ebfd146a
IR
5652 enum vect_def_type dt;
5653 stmt_vec_info prev_stmt_info = NULL;
5654 tree dataref_ptr = NULL_TREE;
74bf76ed 5655 tree dataref_offset = NULL_TREE;
355fe088 5656 gimple *ptr_incr = NULL;
ebfd146a
IR
5657 int ncopies;
5658 int j;
2de001ee
RS
5659 gimple *next_stmt, *first_stmt;
5660 bool grouped_store;
ebfd146a 5661 unsigned int group_size, i;
6e1aa848
DN
5662 vec<tree> oprnds = vNULL;
5663 vec<tree> result_chain = vNULL;
ebfd146a 5664 bool inv_p;
09dfa495 5665 tree offset = NULL_TREE;
6e1aa848 5666 vec<tree> vec_oprnds = vNULL;
ebfd146a 5667 bool slp = (slp_node != NULL);
ebfd146a 5668 unsigned int vec_num;
a70d6342 5669 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5670 vec_info *vinfo = stmt_info->vinfo;
272c6793 5671 tree aggr_type;
134c85ca 5672 gather_scatter_info gs_info;
3bab6342 5673 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5674 gimple *new_stmt;
d9f21f6a 5675 poly_uint64 vf;
2de001ee 5676 vec_load_store_type vls_type;
44fc7854 5677 tree ref_type;
a70d6342 5678
a70d6342 5679 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5680 return false;
5681
66c16fd9
RB
5682 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5683 && ! vec_stmt)
ebfd146a
IR
5684 return false;
5685
5686 /* Is vectorizable store? */
5687
c3a8f964
RS
5688 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
5689 if (is_gimple_assign (stmt))
5690 {
5691 tree scalar_dest = gimple_assign_lhs (stmt);
5692 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5693 && is_pattern_stmt_p (stmt_info))
5694 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5695 if (TREE_CODE (scalar_dest) != ARRAY_REF
5696 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5697 && TREE_CODE (scalar_dest) != INDIRECT_REF
5698 && TREE_CODE (scalar_dest) != COMPONENT_REF
5699 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5700 && TREE_CODE (scalar_dest) != REALPART_EXPR
5701 && TREE_CODE (scalar_dest) != MEM_REF)
5702 return false;
5703 }
5704 else
5705 {
5706 gcall *call = dyn_cast <gcall *> (stmt);
5707 if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
5708 return false;
ebfd146a 5709
c3a8f964
RS
5710 if (slp_node != NULL)
5711 {
5712 if (dump_enabled_p ())
5713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5714 "SLP of masked stores not supported.\n");
5715 return false;
5716 }
5717
5718 ref_type = TREE_TYPE (gimple_call_arg (call, 1));
5719 mask = gimple_call_arg (call, 2);
5720 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
5721 return false;
5722 }
5723
5724 op = vect_get_store_rhs (stmt);
ebfd146a 5725
fce57248
RS
5726 /* Cannot have hybrid store SLP -- that would mean storing to the
5727 same location twice. */
5728 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5729
f4d09712 5730 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 5731 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5732
5733 if (loop_vinfo)
b17dc4d4
RB
5734 {
5735 loop = LOOP_VINFO_LOOP (loop_vinfo);
5736 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5737 }
5738 else
5739 vf = 1;
465c8c19
JJ
5740
5741 /* Multiple types in SLP are handled by creating the appropriate number of
5742 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5743 case of SLP. */
fce57248 5744 if (slp)
465c8c19
JJ
5745 ncopies = 1;
5746 else
e8f142e2 5747 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5748
5749 gcc_assert (ncopies >= 1);
5750
5751 /* FORNOW. This restriction should be relaxed. */
5752 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5753 {
5754 if (dump_enabled_p ())
5755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5756 "multiple types in nested loop.\n");
5757 return false;
5758 }
5759
3133c3b6 5760 if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type))
f4d09712
KY
5761 return false;
5762
272c6793 5763 elem_type = TREE_TYPE (vectype);
ebfd146a 5764 vec_mode = TYPE_MODE (vectype);
7b7b1813 5765
ebfd146a
IR
5766 if (!STMT_VINFO_DATA_REF (stmt_info))
5767 return false;
5768
2de001ee 5769 vect_memory_access_type memory_access_type;
7e11fc7f 5770 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
5771 &memory_access_type, &gs_info))
5772 return false;
3bab6342 5773
c3a8f964
RS
5774 if (mask)
5775 {
7e11fc7f
RS
5776 if (memory_access_type == VMAT_CONTIGUOUS)
5777 {
5778 if (!VECTOR_MODE_P (vec_mode)
5779 || !can_vec_mask_load_store_p (vec_mode,
5780 TYPE_MODE (mask_vectype), false))
5781 return false;
5782 }
5783 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
c3a8f964
RS
5784 {
5785 if (dump_enabled_p ())
5786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5787 "unsupported access type for masked store.\n");
5788 return false;
5789 }
c3a8f964
RS
5790 }
5791 else
5792 {
5793 /* FORNOW. In some cases can vectorize even if data-type not supported
5794 (e.g. - array initialization with 0). */
5795 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5796 return false;
5797 }
5798
ebfd146a
IR
5799 if (!vec_stmt) /* transformation not required. */
5800 {
2de001ee 5801 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 5802 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5803 /* The SLP costs are calculated during SLP analysis. */
5804 if (!PURE_SLP_STMT (stmt_info))
9ce4345a
RS
5805 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
5806 vls_type, NULL, NULL, NULL);
ebfd146a
IR
5807 return true;
5808 }
2de001ee 5809 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 5810
67b8dbac 5811 /* Transform. */
ebfd146a 5812
f702e7d4 5813 ensure_base_align (dr);
c716e67f 5814
2de001ee 5815 if (memory_access_type == VMAT_GATHER_SCATTER)
3bab6342 5816 {
c3a8f964 5817 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 5818 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
5819 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5820 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5821 edge pe = loop_preheader_edge (loop);
5822 gimple_seq seq;
5823 basic_block new_bb;
5824 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
5825 poly_uint64 scatter_off_nunits
5826 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 5827
4d694b27 5828 if (known_eq (nunits, scatter_off_nunits))
3bab6342 5829 modifier = NONE;
4d694b27 5830 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 5831 {
3bab6342
AT
5832 modifier = WIDEN;
5833
4d694b27
RS
5834 /* Currently gathers and scatters are only supported for
5835 fixed-length vectors. */
5836 unsigned int count = scatter_off_nunits.to_constant ();
5837 vec_perm_builder sel (count, count, 1);
5838 for (i = 0; i < (unsigned int) count; ++i)
5839 sel.quick_push (i | (count / 2));
3bab6342 5840
4d694b27 5841 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
5842 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5843 indices);
3bab6342
AT
5844 gcc_assert (perm_mask != NULL_TREE);
5845 }
4d694b27 5846 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 5847 {
3bab6342
AT
5848 modifier = NARROW;
5849
4d694b27
RS
5850 /* Currently gathers and scatters are only supported for
5851 fixed-length vectors. */
5852 unsigned int count = nunits.to_constant ();
5853 vec_perm_builder sel (count, count, 1);
5854 for (i = 0; i < (unsigned int) count; ++i)
5855 sel.quick_push (i | (count / 2));
3bab6342 5856
4d694b27 5857 vec_perm_indices indices (sel, 2, count);
e3342de4 5858 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
5859 gcc_assert (perm_mask != NULL_TREE);
5860 ncopies *= 2;
5861 }
5862 else
5863 gcc_unreachable ();
5864
134c85ca 5865 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
5866 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5867 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5868 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5869 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5870 scaletype = TREE_VALUE (arglist);
5871
5872 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5873 && TREE_CODE (rettype) == VOID_TYPE);
5874
134c85ca 5875 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
5876 if (!is_gimple_min_invariant (ptr))
5877 {
5878 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5879 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5880 gcc_assert (!new_bb);
5881 }
5882
5883 /* Currently we support only unconditional scatter stores,
5884 so mask should be all ones. */
5885 mask = build_int_cst (masktype, -1);
5886 mask = vect_init_vector (stmt, mask, masktype, NULL);
5887
134c85ca 5888 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
5889
5890 prev_stmt_info = NULL;
5891 for (j = 0; j < ncopies; ++j)
5892 {
5893 if (j == 0)
5894 {
5895 src = vec_oprnd1
c3a8f964 5896 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 5897 op = vec_oprnd0
134c85ca 5898 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
5899 }
5900 else if (modifier != NONE && (j & 1))
5901 {
5902 if (modifier == WIDEN)
5903 {
5904 src = vec_oprnd1
5905 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5906 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5907 stmt, gsi);
5908 }
5909 else if (modifier == NARROW)
5910 {
5911 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5912 stmt, gsi);
5913 op = vec_oprnd0
134c85ca
RS
5914 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5915 vec_oprnd0);
3bab6342
AT
5916 }
5917 else
5918 gcc_unreachable ();
5919 }
5920 else
5921 {
5922 src = vec_oprnd1
5923 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5924 op = vec_oprnd0
134c85ca
RS
5925 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5926 vec_oprnd0);
3bab6342
AT
5927 }
5928
5929 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5930 {
928686b1
RS
5931 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
5932 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 5933 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
5934 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5935 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5937 src = var;
5938 }
5939
5940 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5941 {
928686b1
RS
5942 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
5943 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 5944 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
5945 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5946 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5948 op = var;
5949 }
5950
5951 new_stmt
134c85ca 5952 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
5953
5954 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5955
5956 if (prev_stmt_info == NULL)
5957 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5958 else
5959 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5960 prev_stmt_info = vinfo_for_stmt (new_stmt);
5961 }
5962 return true;
5963 }
5964
2de001ee 5965 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
0d0293ac 5966 if (grouped_store)
ebfd146a 5967 {
2de001ee 5968 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 5969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5970 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5971
e14c1050 5972 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5973
5974 /* FORNOW */
a70d6342 5975 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5976
5977 /* We vectorize all the stmts of the interleaving group when we
5978 reach the last stmt in the group. */
e14c1050
IR
5979 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5980 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5981 && !slp)
5982 {
5983 *vec_stmt = NULL;
5984 return true;
5985 }
5986
5987 if (slp)
4b5caab7 5988 {
0d0293ac 5989 grouped_store = false;
4b5caab7
IR
5990 /* VEC_NUM is the number of vect stmts to be created for this
5991 group. */
5992 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5993 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 5994 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 5995 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 5996 op = vect_get_store_rhs (first_stmt);
4b5caab7 5997 }
ebfd146a 5998 else
4b5caab7
IR
5999 /* VEC_NUM is the number of vect stmts to be created for this
6000 group. */
ebfd146a 6001 vec_num = group_size;
44fc7854
BE
6002
6003 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6004 }
b8698a0f 6005 else
ebfd146a
IR
6006 {
6007 first_stmt = stmt;
6008 first_dr = dr;
6009 group_size = vec_num = 1;
44fc7854 6010 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a 6011 }
b8698a0f 6012
73fbfcad 6013 if (dump_enabled_p ())
78c60e3d 6014 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6015 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6016
2de001ee
RS
6017 if (memory_access_type == VMAT_ELEMENTWISE
6018 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6019 {
6020 gimple_stmt_iterator incr_gsi;
6021 bool insert_after;
355fe088 6022 gimple *incr;
f2e2a985
MM
6023 tree offvar;
6024 tree ivstep;
6025 tree running_off;
6026 gimple_seq stmts = NULL;
6027 tree stride_base, stride_step, alias_off;
6028 tree vec_oprnd;
f502d50e 6029 unsigned int g;
4d694b27
RS
6030 /* Checked by get_load_store_type. */
6031 unsigned int const_nunits = nunits.to_constant ();
f2e2a985
MM
6032
6033 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6034
6035 stride_base
6036 = fold_build_pointer_plus
f502d50e 6037 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 6038 size_binop (PLUS_EXPR,
f502d50e 6039 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
44fc7854 6040 convert_to_ptrofftype (DR_INIT (first_dr))));
f502d50e 6041 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
6042
6043 /* For a store with loop-invariant (but other than power-of-2)
6044 stride (i.e. not a grouped access) like so:
6045
6046 for (i = 0; i < n; i += stride)
6047 array[i] = ...;
6048
6049 we generate a new induction variable and new stores from
6050 the components of the (vectorized) rhs:
6051
6052 for (j = 0; ; j += VF*stride)
6053 vectemp = ...;
6054 tmp1 = vectemp[0];
6055 array[j] = tmp1;
6056 tmp2 = vectemp[1];
6057 array[j + stride] = tmp2;
6058 ...
6059 */
6060
4d694b27 6061 unsigned nstores = const_nunits;
b17dc4d4 6062 unsigned lnel = 1;
cee62fee 6063 tree ltype = elem_type;
04199738 6064 tree lvectype = vectype;
cee62fee
MM
6065 if (slp)
6066 {
4d694b27
RS
6067 if (group_size < const_nunits
6068 && const_nunits % group_size == 0)
b17dc4d4 6069 {
4d694b27 6070 nstores = const_nunits / group_size;
b17dc4d4
RB
6071 lnel = group_size;
6072 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6073 lvectype = vectype;
6074
6075 /* First check if vec_extract optab doesn't support extraction
6076 of vector elts directly. */
b397965c 6077 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6078 machine_mode vmode;
6079 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6080 || !VECTOR_MODE_P (vmode)
04199738
RB
6081 || (convert_optab_handler (vec_extract_optab,
6082 TYPE_MODE (vectype), vmode)
6083 == CODE_FOR_nothing))
6084 {
6085 /* Try to avoid emitting an extract of vector elements
6086 by performing the extracts using an integer type of the
6087 same size, extracting from a vector of those and then
6088 re-interpreting it as the original vector type if
6089 supported. */
6090 unsigned lsize
6091 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6092 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6093 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6094 /* If we can't construct such a vector fall back to
6095 element extracts from the original vector type and
6096 element size stores. */
4d694b27 6097 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6098 && VECTOR_MODE_P (vmode)
04199738
RB
6099 && (convert_optab_handler (vec_extract_optab,
6100 vmode, elmode)
6101 != CODE_FOR_nothing))
6102 {
4d694b27 6103 nstores = lnunits;
04199738
RB
6104 lnel = group_size;
6105 ltype = build_nonstandard_integer_type (lsize, 1);
6106 lvectype = build_vector_type (ltype, nstores);
6107 }
6108 /* Else fall back to vector extraction anyway.
6109 Fewer stores are more important than avoiding spilling
6110 of the vector we extract from. Compared to the
6111 construction case in vectorizable_load no store-forwarding
6112 issue exists here for reasonable archs. */
6113 }
b17dc4d4 6114 }
4d694b27
RS
6115 else if (group_size >= const_nunits
6116 && group_size % const_nunits == 0)
b17dc4d4
RB
6117 {
6118 nstores = 1;
4d694b27 6119 lnel = const_nunits;
b17dc4d4 6120 ltype = vectype;
04199738 6121 lvectype = vectype;
b17dc4d4 6122 }
cee62fee
MM
6123 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6124 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6125 }
6126
f2e2a985
MM
6127 ivstep = stride_step;
6128 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6129 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6130
6131 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6132
6133 create_iv (stride_base, ivstep, NULL,
6134 loop, &incr_gsi, insert_after,
6135 &offvar, NULL);
6136 incr = gsi_stmt (incr_gsi);
310213d4 6137 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
6138
6139 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6140 if (stmts)
6141 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6142
6143 prev_stmt_info = NULL;
44fc7854 6144 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6145 next_stmt = first_stmt;
6146 for (g = 0; g < group_size; g++)
f2e2a985 6147 {
f502d50e
MM
6148 running_off = offvar;
6149 if (g)
f2e2a985 6150 {
f502d50e
MM
6151 tree size = TYPE_SIZE_UNIT (ltype);
6152 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6153 size);
f502d50e 6154 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6155 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6156 running_off, pos);
f2e2a985 6157 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6158 running_off = newoff;
f502d50e 6159 }
b17dc4d4
RB
6160 unsigned int group_el = 0;
6161 unsigned HOST_WIDE_INT
6162 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6163 for (j = 0; j < ncopies; j++)
6164 {
c3a8f964 6165 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6166 and first_stmt == stmt. */
6167 if (j == 0)
6168 {
6169 if (slp)
6170 {
6171 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6172 slp_node);
f502d50e
MM
6173 vec_oprnd = vec_oprnds[0];
6174 }
6175 else
6176 {
c3a8f964 6177 op = vect_get_store_rhs (next_stmt);
81c40241 6178 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6179 }
6180 }
f2e2a985 6181 else
f502d50e
MM
6182 {
6183 if (slp)
6184 vec_oprnd = vec_oprnds[j];
6185 else
c079cbac 6186 {
81c40241 6187 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
6188 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6189 }
f502d50e 6190 }
04199738
RB
6191 /* Pun the vector to extract from if necessary. */
6192 if (lvectype != vectype)
6193 {
6194 tree tem = make_ssa_name (lvectype);
6195 gimple *pun
6196 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6197 lvectype, vec_oprnd));
6198 vect_finish_stmt_generation (stmt, pun, gsi);
6199 vec_oprnd = tem;
6200 }
f502d50e
MM
6201 for (i = 0; i < nstores; i++)
6202 {
6203 tree newref, newoff;
355fe088 6204 gimple *incr, *assign;
f502d50e
MM
6205 tree size = TYPE_SIZE (ltype);
6206 /* Extract the i'th component. */
6207 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6208 bitsize_int (i), size);
6209 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6210 size, pos);
6211
6212 elem = force_gimple_operand_gsi (gsi, elem, true,
6213 NULL_TREE, true,
6214 GSI_SAME_STMT);
6215
b17dc4d4
RB
6216 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6217 group_el * elsz);
f502d50e 6218 newref = build2 (MEM_REF, ltype,
b17dc4d4 6219 running_off, this_off);
f502d50e
MM
6220
6221 /* And store it to *running_off. */
6222 assign = gimple_build_assign (newref, elem);
6223 vect_finish_stmt_generation (stmt, assign, gsi);
6224
b17dc4d4
RB
6225 group_el += lnel;
6226 if (! slp
6227 || group_el == group_size)
6228 {
6229 newoff = copy_ssa_name (running_off, NULL);
6230 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6231 running_off, stride_step);
6232 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6233
b17dc4d4
RB
6234 running_off = newoff;
6235 group_el = 0;
6236 }
225ce44b
RB
6237 if (g == group_size - 1
6238 && !slp)
f502d50e
MM
6239 {
6240 if (j == 0 && i == 0)
225ce44b
RB
6241 STMT_VINFO_VEC_STMT (stmt_info)
6242 = *vec_stmt = assign;
f502d50e
MM
6243 else
6244 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6245 prev_stmt_info = vinfo_for_stmt (assign);
6246 }
6247 }
f2e2a985 6248 }
f502d50e 6249 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6250 if (slp)
6251 break;
f2e2a985 6252 }
778dd3b6
RB
6253
6254 vec_oprnds.release ();
f2e2a985
MM
6255 return true;
6256 }
6257
8c681247 6258 auto_vec<tree> dr_chain (group_size);
9771b263 6259 oprnds.create (group_size);
ebfd146a 6260
720f5239 6261 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6262 gcc_assert (alignment_support_scheme);
272c6793 6263 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6264 realignment. vect_supportable_dr_alignment always returns either
6265 dr_aligned or dr_unaligned_supported for masked operations. */
6266 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask)
272c6793
RS
6267 || alignment_support_scheme == dr_aligned
6268 || alignment_support_scheme == dr_unaligned_supported);
6269
62da9e14
RS
6270 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6271 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6272 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6273
2de001ee 6274 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
6275 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6276 else
6277 aggr_type = vectype;
ebfd146a 6278
c3a8f964
RS
6279 if (mask)
6280 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6281
ebfd146a
IR
6282 /* In case the vectorization factor (VF) is bigger than the number
6283 of elements that we can fit in a vectype (nunits), we have to generate
6284 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6285 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6286 vect_get_vec_def_for_copy_stmt. */
6287
0d0293ac 6288 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6289
6290 S1: &base + 2 = x2
6291 S2: &base = x0
6292 S3: &base + 1 = x1
6293 S4: &base + 3 = x3
6294
6295 We create vectorized stores starting from base address (the access of the
6296 first stmt in the chain (S2 in the above example), when the last store stmt
6297 of the chain (S4) is reached:
6298
6299 VS1: &base = vx2
6300 VS2: &base + vec_size*1 = vx0
6301 VS3: &base + vec_size*2 = vx1
6302 VS4: &base + vec_size*3 = vx3
6303
6304 Then permutation statements are generated:
6305
3fcc1b55
JJ
6306 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6307 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6308 ...
b8698a0f 6309
ebfd146a
IR
6310 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6311 (the order of the data-refs in the output of vect_permute_store_chain
6312 corresponds to the order of scalar stmts in the interleaving chain - see
6313 the documentation of vect_permute_store_chain()).
6314
6315 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6316 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6317 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6318 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6319 */
6320
6321 prev_stmt_info = NULL;
c3a8f964 6322 tree vec_mask = NULL_TREE;
ebfd146a
IR
6323 for (j = 0; j < ncopies; j++)
6324 {
ebfd146a
IR
6325
6326 if (j == 0)
6327 {
6328 if (slp)
6329 {
6330 /* Get vectorized arguments for SLP_NODE. */
d092494c 6331 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6332 NULL, slp_node);
ebfd146a 6333
9771b263 6334 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6335 }
6336 else
6337 {
b8698a0f
L
6338 /* For interleaved stores we collect vectorized defs for all the
6339 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6340 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6341 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6342
0d0293ac 6343 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6344 OPRNDS are of size 1. */
b8698a0f 6345 next_stmt = first_stmt;
ebfd146a
IR
6346 for (i = 0; i < group_size; i++)
6347 {
b8698a0f
L
6348 /* Since gaps are not supported for interleaved stores,
6349 GROUP_SIZE is the exact number of stmts in the chain.
6350 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6351 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a 6352 iteration of the loop will be executed. */
c3a8f964 6353 op = vect_get_store_rhs (next_stmt);
81c40241 6354 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6355 dr_chain.quick_push (vec_oprnd);
6356 oprnds.quick_push (vec_oprnd);
e14c1050 6357 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6358 }
c3a8f964
RS
6359 if (mask)
6360 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6361 mask_vectype);
ebfd146a
IR
6362 }
6363
6364 /* We should have catched mismatched types earlier. */
6365 gcc_assert (useless_type_conversion_p (vectype,
6366 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6367 bool simd_lane_access_p
6368 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6369 if (simd_lane_access_p
6370 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6371 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6372 && integer_zerop (DR_OFFSET (first_dr))
6373 && integer_zerop (DR_INIT (first_dr))
6374 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6375 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6376 {
6377 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6378 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6379 inv_p = false;
74bf76ed
JJ
6380 }
6381 else
6382 dataref_ptr
6383 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6384 simd_lane_access_p ? loop : NULL,
09dfa495 6385 offset, &dummy, gsi, &ptr_incr,
74bf76ed 6386 simd_lane_access_p, &inv_p);
a70d6342 6387 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6388 }
b8698a0f 6389 else
ebfd146a 6390 {
b8698a0f
L
6391 /* For interleaved stores we created vectorized defs for all the
6392 defs stored in OPRNDS in the previous iteration (previous copy).
6393 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6394 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6395 next copy.
0d0293ac 6396 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6397 OPRNDS are of size 1. */
6398 for (i = 0; i < group_size; i++)
6399 {
9771b263 6400 op = oprnds[i];
81c40241 6401 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 6402 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
6403 dr_chain[i] = vec_oprnd;
6404 oprnds[i] = vec_oprnd;
ebfd146a 6405 }
c3a8f964
RS
6406 if (mask)
6407 {
6408 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
6409 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
6410 }
74bf76ed
JJ
6411 if (dataref_offset)
6412 dataref_offset
6413 = int_const_binop (PLUS_EXPR, dataref_offset,
6414 TYPE_SIZE_UNIT (aggr_type));
6415 else
6416 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6417 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
6418 }
6419
2de001ee 6420 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 6421 {
272c6793 6422 tree vec_array;
267d3070 6423
272c6793
RS
6424 /* Combine all the vectors into an array. */
6425 vec_array = create_vector_array (vectype, vec_num);
6426 for (i = 0; i < vec_num; i++)
c2d7ab2a 6427 {
9771b263 6428 vec_oprnd = dr_chain[i];
272c6793 6429 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 6430 }
b8698a0f 6431
7e11fc7f
RS
6432 gcall *call;
6433 if (mask)
6434 {
6435 /* Emit:
6436 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6437 VEC_ARRAY). */
6438 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6439 tree alias_ptr = build_int_cst (ref_type, align);
6440 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6441 dataref_ptr, alias_ptr,
6442 vec_mask, vec_array);
6443 }
6444 else
6445 {
6446 /* Emit:
6447 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6448 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6449 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6450 vec_array);
6451 gimple_call_set_lhs (call, data_ref);
6452 }
a844293d
RS
6453 gimple_call_set_nothrow (call, true);
6454 new_stmt = call;
267d3070 6455 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6456 }
6457 else
6458 {
6459 new_stmt = NULL;
0d0293ac 6460 if (grouped_store)
272c6793 6461 {
b6b9227d
JJ
6462 if (j == 0)
6463 result_chain.create (group_size);
272c6793
RS
6464 /* Permute. */
6465 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6466 &result_chain);
6467 }
c2d7ab2a 6468
272c6793
RS
6469 next_stmt = first_stmt;
6470 for (i = 0; i < vec_num; i++)
6471 {
644ffefd 6472 unsigned align, misalign;
272c6793
RS
6473
6474 if (i > 0)
6475 /* Bump the vector pointer. */
6476 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6477 stmt, NULL_TREE);
6478
6479 if (slp)
9771b263 6480 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
6481 else if (grouped_store)
6482 /* For grouped stores vectorized defs are interleaved in
272c6793 6483 vect_permute_store_chain(). */
9771b263 6484 vec_oprnd = result_chain[i];
272c6793 6485
f702e7d4 6486 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 6487 if (aligned_access_p (first_dr))
644ffefd 6488 misalign = 0;
272c6793
RS
6489 else if (DR_MISALIGNMENT (first_dr) == -1)
6490 {
25f68d90 6491 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 6492 misalign = 0;
272c6793
RS
6493 }
6494 else
c3a8f964 6495 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
6496 if (dataref_offset == NULL_TREE
6497 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6498 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6499 misalign);
c2d7ab2a 6500
62da9e14 6501 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6502 {
6503 tree perm_mask = perm_mask_for_reverse (vectype);
6504 tree perm_dest
c3a8f964 6505 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 6506 vectype);
b731b390 6507 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6508
6509 /* Generate the permute statement. */
355fe088 6510 gimple *perm_stmt
0d0e4a03
JJ
6511 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6512 vec_oprnd, perm_mask);
09dfa495
BM
6513 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6514
6515 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6516 vec_oprnd = new_temp;
6517 }
6518
272c6793 6519 /* Arguments are ready. Create the new vector stmt. */
c3a8f964
RS
6520 if (mask)
6521 {
6522 align = least_bit_hwi (misalign | align);
6523 tree ptr = build_int_cst (ref_type, align);
6524 gcall *call
6525 = gimple_build_call_internal (IFN_MASK_STORE, 4,
6526 dataref_ptr, ptr,
6527 vec_mask, vec_oprnd);
6528 gimple_call_set_nothrow (call, true);
6529 new_stmt = call;
6530 }
6531 else
6532 {
6533 data_ref = fold_build2 (MEM_REF, vectype,
6534 dataref_ptr,
6535 dataref_offset
6536 ? dataref_offset
6537 : build_int_cst (ref_type, 0));
6538 if (aligned_access_p (first_dr))
6539 ;
6540 else if (DR_MISALIGNMENT (first_dr) == -1)
6541 TREE_TYPE (data_ref)
6542 = build_aligned_type (TREE_TYPE (data_ref),
6543 align * BITS_PER_UNIT);
6544 else
6545 TREE_TYPE (data_ref)
6546 = build_aligned_type (TREE_TYPE (data_ref),
6547 TYPE_ALIGN (elem_type));
6548 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6549 }
272c6793 6550 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6551
6552 if (slp)
6553 continue;
6554
e14c1050 6555 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6556 if (!next_stmt)
6557 break;
6558 }
ebfd146a 6559 }
1da0876c
RS
6560 if (!slp)
6561 {
6562 if (j == 0)
6563 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6564 else
6565 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6566 prev_stmt_info = vinfo_for_stmt (new_stmt);
6567 }
ebfd146a
IR
6568 }
6569
9771b263
DN
6570 oprnds.release ();
6571 result_chain.release ();
6572 vec_oprnds.release ();
ebfd146a
IR
6573
6574 return true;
6575}
6576
557be5a8
AL
6577/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6578 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 6579 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 6580 vect_gen_perm_mask_checked. */
a1e53f3f 6581
3fcc1b55 6582tree
4aae3cb3 6583vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 6584{
b00cb3bf 6585 tree mask_type;
a1e53f3f 6586
0ecc2b7d
RS
6587 poly_uint64 nunits = sel.length ();
6588 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
6589
6590 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 6591 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
6592}
6593
7ac7e286 6594/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 6595 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6596
6597tree
4aae3cb3 6598vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 6599{
7ac7e286 6600 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
6601 return vect_gen_perm_mask_any (vectype, sel);
6602}
6603
aec7ae7d
JJ
6604/* Given a vector variable X and Y, that was generated for the scalar
6605 STMT, generate instructions to permute the vector elements of X and Y
6606 using permutation mask MASK_VEC, insert them at *GSI and return the
6607 permuted vector variable. */
a1e53f3f
L
6608
6609static tree
355fe088 6610permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6611 gimple_stmt_iterator *gsi)
a1e53f3f
L
6612{
6613 tree vectype = TREE_TYPE (x);
aec7ae7d 6614 tree perm_dest, data_ref;
355fe088 6615 gimple *perm_stmt;
a1e53f3f 6616
7ad429a4
RS
6617 tree scalar_dest = gimple_get_lhs (stmt);
6618 if (TREE_CODE (scalar_dest) == SSA_NAME)
6619 perm_dest = vect_create_destination_var (scalar_dest, vectype);
6620 else
6621 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 6622 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6623
6624 /* Generate the permute statement. */
0d0e4a03 6625 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6626 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6627
6628 return data_ref;
6629}
6630
6b916b36
RB
6631/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6632 inserting them on the loops preheader edge. Returns true if we
6633 were successful in doing so (and thus STMT can be moved then),
6634 otherwise returns false. */
6635
6636static bool
355fe088 6637hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6638{
6639 ssa_op_iter i;
6640 tree op;
6641 bool any = false;
6642
6643 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6644 {
355fe088 6645 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6646 if (!gimple_nop_p (def_stmt)
6647 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6648 {
6649 /* Make sure we don't need to recurse. While we could do
6650 so in simple cases when there are more complex use webs
6651 we don't have an easy way to preserve stmt order to fulfil
6652 dependencies within them. */
6653 tree op2;
6654 ssa_op_iter i2;
d1417442
JJ
6655 if (gimple_code (def_stmt) == GIMPLE_PHI)
6656 return false;
6b916b36
RB
6657 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6658 {
355fe088 6659 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6660 if (!gimple_nop_p (def_stmt2)
6661 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6662 return false;
6663 }
6664 any = true;
6665 }
6666 }
6667
6668 if (!any)
6669 return true;
6670
6671 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6672 {
355fe088 6673 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6674 if (!gimple_nop_p (def_stmt)
6675 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6676 {
6677 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6678 gsi_remove (&gsi, false);
6679 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6680 }
6681 }
6682
6683 return true;
6684}
6685
ebfd146a
IR
6686/* vectorizable_load.
6687
b8698a0f
L
6688 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6689 can be vectorized.
6690 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6691 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6692 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6693
6694static bool
355fe088 6695vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6696 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6697{
6698 tree scalar_dest;
6699 tree vec_dest = NULL;
6700 tree data_ref = NULL;
6701 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6702 stmt_vec_info prev_stmt_info;
ebfd146a 6703 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6704 struct loop *loop = NULL;
ebfd146a 6705 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6706 bool nested_in_vect_loop = false;
c716e67f 6707 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6708 tree elem_type;
ebfd146a 6709 tree new_temp;
ef4bddc2 6710 machine_mode mode;
355fe088 6711 gimple *new_stmt = NULL;
ebfd146a
IR
6712 tree dummy;
6713 enum dr_alignment_support alignment_support_scheme;
6714 tree dataref_ptr = NULL_TREE;
74bf76ed 6715 tree dataref_offset = NULL_TREE;
355fe088 6716 gimple *ptr_incr = NULL;
ebfd146a 6717 int ncopies;
4d694b27
RS
6718 int i, j;
6719 unsigned int group_size;
6720 poly_uint64 group_gap_adj;
ebfd146a
IR
6721 tree msq = NULL_TREE, lsq;
6722 tree offset = NULL_TREE;
356bbc4c 6723 tree byte_offset = NULL_TREE;
ebfd146a 6724 tree realignment_token = NULL_TREE;
538dd0b7 6725 gphi *phi = NULL;
6e1aa848 6726 vec<tree> dr_chain = vNULL;
0d0293ac 6727 bool grouped_load = false;
355fe088 6728 gimple *first_stmt;
4f0a0218 6729 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
6730 bool inv_p;
6731 bool compute_in_loop = false;
6732 struct loop *at_loop;
6733 int vec_num;
6734 bool slp = (slp_node != NULL);
6735 bool slp_perm = false;
a70d6342 6736 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 6737 poly_uint64 vf;
272c6793 6738 tree aggr_type;
134c85ca 6739 gather_scatter_info gs_info;
310213d4 6740 vec_info *vinfo = stmt_info->vinfo;
44fc7854 6741 tree ref_type;
a70d6342 6742
465c8c19
JJ
6743 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6744 return false;
6745
66c16fd9
RB
6746 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6747 && ! vec_stmt)
465c8c19
JJ
6748 return false;
6749
c3a8f964
RS
6750 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6751 if (is_gimple_assign (stmt))
6752 {
6753 scalar_dest = gimple_assign_lhs (stmt);
6754 if (TREE_CODE (scalar_dest) != SSA_NAME)
6755 return false;
465c8c19 6756
c3a8f964
RS
6757 tree_code code = gimple_assign_rhs_code (stmt);
6758 if (code != ARRAY_REF
6759 && code != BIT_FIELD_REF
6760 && code != INDIRECT_REF
6761 && code != COMPONENT_REF
6762 && code != IMAGPART_EXPR
6763 && code != REALPART_EXPR
6764 && code != MEM_REF
6765 && TREE_CODE_CLASS (code) != tcc_declaration)
6766 return false;
6767 }
6768 else
6769 {
6770 gcall *call = dyn_cast <gcall *> (stmt);
6771 if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD))
6772 return false;
465c8c19 6773
c3a8f964
RS
6774 scalar_dest = gimple_call_lhs (call);
6775 if (!scalar_dest)
6776 return false;
6777
6778 if (slp_node != NULL)
6779 {
6780 if (dump_enabled_p ())
6781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6782 "SLP of masked loads not supported.\n");
6783 return false;
6784 }
6785
6786 mask = gimple_call_arg (call, 2);
6787 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
6788 return false;
6789 }
465c8c19
JJ
6790
6791 if (!STMT_VINFO_DATA_REF (stmt_info))
6792 return false;
6793
6794 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 6795 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 6796
a70d6342
IR
6797 if (loop_vinfo)
6798 {
6799 loop = LOOP_VINFO_LOOP (loop_vinfo);
6800 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6801 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6802 }
6803 else
3533e503 6804 vf = 1;
ebfd146a
IR
6805
6806 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6807 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6808 case of SLP. */
fce57248 6809 if (slp)
ebfd146a
IR
6810 ncopies = 1;
6811 else
e8f142e2 6812 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
6813
6814 gcc_assert (ncopies >= 1);
6815
6816 /* FORNOW. This restriction should be relaxed. */
6817 if (nested_in_vect_loop && ncopies > 1)
6818 {
73fbfcad 6819 if (dump_enabled_p ())
78c60e3d 6820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6821 "multiple types in nested loop.\n");
ebfd146a
IR
6822 return false;
6823 }
6824
f2556b68
RB
6825 /* Invalidate assumptions made by dependence analysis when vectorization
6826 on the unrolled body effectively re-orders stmts. */
6827 if (ncopies > 1
6828 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6829 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6830 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6831 {
6832 if (dump_enabled_p ())
6833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6834 "cannot perform implicit CSE when unrolling "
6835 "with negative dependence distance\n");
6836 return false;
6837 }
6838
7b7b1813 6839 elem_type = TREE_TYPE (vectype);
947131ba 6840 mode = TYPE_MODE (vectype);
ebfd146a
IR
6841
6842 /* FORNOW. In some cases can vectorize even if data-type not supported
6843 (e.g. - data copies). */
947131ba 6844 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6845 {
73fbfcad 6846 if (dump_enabled_p ())
78c60e3d 6847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6848 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6849 return false;
6850 }
6851
ebfd146a 6852 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6853 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6854 {
0d0293ac 6855 grouped_load = true;
ebfd146a 6856 /* FORNOW */
2de001ee
RS
6857 gcc_assert (!nested_in_vect_loop);
6858 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6859
e14c1050 6860 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 6861 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 6862
b1af7da6
RB
6863 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6864 slp_perm = true;
6865
f2556b68
RB
6866 /* Invalidate assumptions made by dependence analysis when vectorization
6867 on the unrolled body effectively re-orders stmts. */
6868 if (!PURE_SLP_STMT (stmt_info)
6869 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6870 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6871 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6872 {
6873 if (dump_enabled_p ())
6874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6875 "cannot perform implicit CSE when performing "
6876 "group loads with negative dependence distance\n");
6877 return false;
6878 }
96bb56b2
RB
6879
6880 /* Similarly when the stmt is a load that is both part of a SLP
6881 instance and a loop vectorized stmt via the same-dr mechanism
6882 we have to give up. */
6883 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6884 && (STMT_SLP_TYPE (stmt_info)
6885 != STMT_SLP_TYPE (vinfo_for_stmt
6886 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6887 {
6888 if (dump_enabled_p ())
6889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6890 "conflicting SLP types for CSEd load\n");
6891 return false;
6892 }
ebfd146a
IR
6893 }
6894
2de001ee 6895 vect_memory_access_type memory_access_type;
7e11fc7f 6896 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
6897 &memory_access_type, &gs_info))
6898 return false;
a1e53f3f 6899
c3a8f964
RS
6900 if (mask)
6901 {
6902 if (memory_access_type == VMAT_CONTIGUOUS)
6903 {
7e11fc7f
RS
6904 machine_mode vec_mode = TYPE_MODE (vectype);
6905 if (!VECTOR_MODE_P (vec_mode)
6906 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
6907 TYPE_MODE (mask_vectype), true))
6908 return false;
6909 }
6910 else if (memory_access_type == VMAT_GATHER_SCATTER)
6911 {
6912 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6913 tree masktype
6914 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
6915 if (TREE_CODE (masktype) == INTEGER_TYPE)
6916 {
6917 if (dump_enabled_p ())
6918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6919 "masked gather with integer mask not"
6920 " supported.");
6921 return false;
6922 }
6923 }
7e11fc7f 6924 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
c3a8f964
RS
6925 {
6926 if (dump_enabled_p ())
6927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6928 "unsupported access type for masked load.\n");
6929 return false;
6930 }
6931 }
6932
ebfd146a
IR
6933 if (!vec_stmt) /* transformation not required. */
6934 {
2de001ee
RS
6935 if (!slp)
6936 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
ebfd146a 6937 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6938 /* The SLP costs are calculated during SLP analysis. */
6939 if (!PURE_SLP_STMT (stmt_info))
2de001ee 6940 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2e8ab70c 6941 NULL, NULL, NULL);
ebfd146a
IR
6942 return true;
6943 }
6944
2de001ee
RS
6945 if (!slp)
6946 gcc_assert (memory_access_type
6947 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6948
73fbfcad 6949 if (dump_enabled_p ())
78c60e3d 6950 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6951 "transform load. ncopies = %d\n", ncopies);
ebfd146a 6952
67b8dbac 6953 /* Transform. */
ebfd146a 6954
f702e7d4 6955 ensure_base_align (dr);
c716e67f 6956
2de001ee 6957 if (memory_access_type == VMAT_GATHER_SCATTER)
aec7ae7d 6958 {
c3a8f964 6959 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
aec7ae7d
JJ
6960 return true;
6961 }
2de001ee
RS
6962
6963 if (memory_access_type == VMAT_ELEMENTWISE
6964 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
6965 {
6966 gimple_stmt_iterator incr_gsi;
6967 bool insert_after;
355fe088 6968 gimple *incr;
7d75abc8 6969 tree offvar;
7d75abc8
MM
6970 tree ivstep;
6971 tree running_off;
9771b263 6972 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 6973 gimple_seq stmts = NULL;
14ac6aa2 6974 tree stride_base, stride_step, alias_off;
4d694b27
RS
6975 /* Checked by get_load_store_type. */
6976 unsigned int const_nunits = nunits.to_constant ();
14ac6aa2
RB
6977
6978 gcc_assert (!nested_in_vect_loop);
7d75abc8 6979
f502d50e 6980 if (slp && grouped_load)
44fc7854
BE
6981 {
6982 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6983 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6984 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6985 ref_type = get_group_alias_ptr_type (first_stmt);
6986 }
ab313a8c 6987 else
44fc7854
BE
6988 {
6989 first_stmt = stmt;
6990 first_dr = dr;
6991 group_size = 1;
6992 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6993 }
ab313a8c 6994
14ac6aa2
RB
6995 stride_base
6996 = fold_build_pointer_plus
ab313a8c 6997 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 6998 size_binop (PLUS_EXPR,
ab313a8c
RB
6999 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7000 convert_to_ptrofftype (DR_INIT (first_dr))));
7001 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7002
7003 /* For a load with loop-invariant (but other than power-of-2)
7004 stride (i.e. not a grouped access) like so:
7005
7006 for (i = 0; i < n; i += stride)
7007 ... = array[i];
7008
7009 we generate a new induction variable and new accesses to
7010 form a new vector (or vectors, depending on ncopies):
7011
7012 for (j = 0; ; j += VF*stride)
7013 tmp1 = array[j];
7014 tmp2 = array[j + stride];
7015 ...
7016 vectemp = {tmp1, tmp2, ...}
7017 */
7018
ab313a8c
RB
7019 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7020 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7021
7022 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7023
ab313a8c 7024 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
7025 loop, &incr_gsi, insert_after,
7026 &offvar, NULL);
7027 incr = gsi_stmt (incr_gsi);
310213d4 7028 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7029
ab313a8c
RB
7030 stride_step = force_gimple_operand (unshare_expr (stride_step),
7031 &stmts, true, NULL_TREE);
7d75abc8
MM
7032 if (stmts)
7033 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7034
7035 prev_stmt_info = NULL;
7036 running_off = offvar;
44fc7854 7037 alias_off = build_int_cst (ref_type, 0);
4d694b27 7038 int nloads = const_nunits;
e09b4c37 7039 int lnel = 1;
7b5fc413 7040 tree ltype = TREE_TYPE (vectype);
ea60dd34 7041 tree lvectype = vectype;
b266b968 7042 auto_vec<tree> dr_chain;
2de001ee 7043 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7044 {
4d694b27 7045 if (group_size < const_nunits)
e09b4c37 7046 {
ff03930a
JJ
7047 /* First check if vec_init optab supports construction from
7048 vector elts directly. */
b397965c 7049 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7050 machine_mode vmode;
7051 if (mode_for_vector (elmode, group_size).exists (&vmode)
7052 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7053 && (convert_optab_handler (vec_init_optab,
7054 TYPE_MODE (vectype), vmode)
7055 != CODE_FOR_nothing))
ea60dd34 7056 {
4d694b27 7057 nloads = const_nunits / group_size;
ea60dd34 7058 lnel = group_size;
ff03930a
JJ
7059 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7060 }
7061 else
7062 {
7063 /* Otherwise avoid emitting a constructor of vector elements
7064 by performing the loads using an integer type of the same
7065 size, constructing a vector of those and then
7066 re-interpreting it as the original vector type.
7067 This avoids a huge runtime penalty due to the general
7068 inability to perform store forwarding from smaller stores
7069 to a larger load. */
7070 unsigned lsize
7071 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7072 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7073 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7074 /* If we can't construct such a vector fall back to
7075 element loads of the original vector type. */
4d694b27 7076 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7077 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7078 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7079 != CODE_FOR_nothing))
7080 {
4d694b27 7081 nloads = lnunits;
ff03930a
JJ
7082 lnel = group_size;
7083 ltype = build_nonstandard_integer_type (lsize, 1);
7084 lvectype = build_vector_type (ltype, nloads);
7085 }
ea60dd34 7086 }
e09b4c37 7087 }
2de001ee 7088 else
e09b4c37 7089 {
ea60dd34 7090 nloads = 1;
4d694b27 7091 lnel = const_nunits;
e09b4c37 7092 ltype = vectype;
e09b4c37 7093 }
2de001ee
RS
7094 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7095 }
7096 if (slp)
7097 {
66c16fd9
RB
7098 /* For SLP permutation support we need to load the whole group,
7099 not only the number of vector stmts the permutation result
7100 fits in. */
b266b968 7101 if (slp_perm)
66c16fd9 7102 {
d9f21f6a
RS
7103 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7104 variable VF. */
7105 unsigned int const_vf = vf.to_constant ();
4d694b27 7106 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7107 dr_chain.create (ncopies);
7108 }
7109 else
7110 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7111 }
4d694b27 7112 unsigned int group_el = 0;
e09b4c37
RB
7113 unsigned HOST_WIDE_INT
7114 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7115 for (j = 0; j < ncopies; j++)
7116 {
7b5fc413 7117 if (nloads > 1)
e09b4c37
RB
7118 vec_alloc (v, nloads);
7119 for (i = 0; i < nloads; i++)
7b5fc413 7120 {
e09b4c37
RB
7121 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7122 group_el * elsz);
7123 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7124 build2 (MEM_REF, ltype,
7125 running_off, this_off));
7126 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7127 if (nloads > 1)
7128 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7129 gimple_assign_lhs (new_stmt));
7130
7131 group_el += lnel;
7132 if (! slp
7133 || group_el == group_size)
7b5fc413 7134 {
e09b4c37
RB
7135 tree newoff = copy_ssa_name (running_off);
7136 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7137 running_off, stride_step);
7b5fc413
RB
7138 vect_finish_stmt_generation (stmt, incr, gsi);
7139
7140 running_off = newoff;
e09b4c37 7141 group_el = 0;
7b5fc413 7142 }
7b5fc413 7143 }
e09b4c37 7144 if (nloads > 1)
7d75abc8 7145 {
ea60dd34
RB
7146 tree vec_inv = build_constructor (lvectype, v);
7147 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7148 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7149 if (lvectype != vectype)
7150 {
7151 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7152 VIEW_CONVERT_EXPR,
7153 build1 (VIEW_CONVERT_EXPR,
7154 vectype, new_temp));
7155 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7156 }
7d75abc8
MM
7157 }
7158
7b5fc413 7159 if (slp)
b266b968 7160 {
b266b968
RB
7161 if (slp_perm)
7162 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7163 else
7164 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7165 }
7d75abc8 7166 else
225ce44b
RB
7167 {
7168 if (j == 0)
7169 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7170 else
7171 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7172 prev_stmt_info = vinfo_for_stmt (new_stmt);
7173 }
7d75abc8 7174 }
b266b968 7175 if (slp_perm)
29afecdf
RB
7176 {
7177 unsigned n_perms;
7178 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7179 slp_node_instance, false, &n_perms);
7180 }
7d75abc8
MM
7181 return true;
7182 }
aec7ae7d 7183
0d0293ac 7184 if (grouped_load)
ebfd146a 7185 {
e14c1050 7186 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7187 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7188 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7189 without permutation. */
7190 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7191 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7192 /* For BB vectorization always use the first stmt to base
7193 the data ref pointer on. */
7194 if (bb_vinfo)
7195 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7196
ebfd146a 7197 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7198 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7199 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7200 ??? But we can only do so if there is exactly one
7201 as we have no way to get at the rest. Leave the CSE
7202 opportunity alone.
7203 ??? With the group load eventually participating
7204 in multiple different permutations (having multiple
7205 slp nodes which refer to the same group) the CSE
7206 is even wrong code. See PR56270. */
7207 && !slp)
ebfd146a
IR
7208 {
7209 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7210 return true;
7211 }
7212 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7213 group_gap_adj = 0;
ebfd146a
IR
7214
7215 /* VEC_NUM is the number of vect stmts to be created for this group. */
7216 if (slp)
7217 {
0d0293ac 7218 grouped_load = false;
91ff1504
RB
7219 /* For SLP permutation support we need to load the whole group,
7220 not only the number of vector stmts the permutation result
7221 fits in. */
7222 if (slp_perm)
b267968e 7223 {
d9f21f6a
RS
7224 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7225 variable VF. */
7226 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7227 unsigned int const_nunits = nunits.to_constant ();
7228 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7229 group_gap_adj = vf * group_size - nunits * vec_num;
7230 }
91ff1504 7231 else
b267968e
RB
7232 {
7233 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7234 group_gap_adj
7235 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7236 }
a70d6342 7237 }
ebfd146a 7238 else
9b999e8c 7239 vec_num = group_size;
44fc7854
BE
7240
7241 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7242 }
7243 else
7244 {
7245 first_stmt = stmt;
7246 first_dr = dr;
7247 group_size = vec_num = 1;
9b999e8c 7248 group_gap_adj = 0;
44fc7854 7249 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7250 }
7251
720f5239 7252 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7253 gcc_assert (alignment_support_scheme);
272c6793
RS
7254 /* Targets with load-lane instructions must not require explicit
7255 realignment. */
2de001ee 7256 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
272c6793
RS
7257 || alignment_support_scheme == dr_aligned
7258 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7259
7260 /* In case the vectorization factor (VF) is bigger than the number
7261 of elements that we can fit in a vectype (nunits), we have to generate
7262 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7263 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7264 from one copy of the vector stmt to the next, in the field
ff802fa1 7265 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7266 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7267 stmts that use the defs of the current stmt. The example below
7268 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7269 need to create 4 vectorized stmts):
ebfd146a
IR
7270
7271 before vectorization:
7272 RELATED_STMT VEC_STMT
7273 S1: x = memref - -
7274 S2: z = x + 1 - -
7275
7276 step 1: vectorize stmt S1:
7277 We first create the vector stmt VS1_0, and, as usual, record a
7278 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7279 Next, we create the vector stmt VS1_1, and record a pointer to
7280 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7281 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7282 stmts and pointers:
7283 RELATED_STMT VEC_STMT
7284 VS1_0: vx0 = memref0 VS1_1 -
7285 VS1_1: vx1 = memref1 VS1_2 -
7286 VS1_2: vx2 = memref2 VS1_3 -
7287 VS1_3: vx3 = memref3 - -
7288 S1: x = load - VS1_0
7289 S2: z = x + 1 - -
7290
b8698a0f
L
7291 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7292 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7293 stmt S2. */
7294
0d0293ac 7295 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7296
7297 S1: x2 = &base + 2
7298 S2: x0 = &base
7299 S3: x1 = &base + 1
7300 S4: x3 = &base + 3
7301
b8698a0f 7302 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7303 starting from the access of the first stmt of the chain:
7304
7305 VS1: vx0 = &base
7306 VS2: vx1 = &base + vec_size*1
7307 VS3: vx3 = &base + vec_size*2
7308 VS4: vx4 = &base + vec_size*3
7309
7310 Then permutation statements are generated:
7311
e2c83630
RH
7312 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7313 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7314 ...
7315
7316 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7317 (the order of the data-refs in the output of vect_permute_load_chain
7318 corresponds to the order of scalar stmts in the interleaving chain - see
7319 the documentation of vect_permute_load_chain()).
7320 The generation of permutation stmts and recording them in
0d0293ac 7321 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 7322
b8698a0f 7323 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
7324 permutation stmts above are created for every copy. The result vector
7325 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7326 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
7327
7328 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7329 on a target that supports unaligned accesses (dr_unaligned_supported)
7330 we generate the following code:
7331 p = initial_addr;
7332 indx = 0;
7333 loop {
7334 p = p + indx * vectype_size;
7335 vec_dest = *(p);
7336 indx = indx + 1;
7337 }
7338
7339 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 7340 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
7341 then generate the following code, in which the data in each iteration is
7342 obtained by two vector loads, one from the previous iteration, and one
7343 from the current iteration:
7344 p1 = initial_addr;
7345 msq_init = *(floor(p1))
7346 p2 = initial_addr + VS - 1;
7347 realignment_token = call target_builtin;
7348 indx = 0;
7349 loop {
7350 p2 = p2 + indx * vectype_size
7351 lsq = *(floor(p2))
7352 vec_dest = realign_load (msq, lsq, realignment_token)
7353 indx = indx + 1;
7354 msq = lsq;
7355 } */
7356
7357 /* If the misalignment remains the same throughout the execution of the
7358 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7359 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7360 This can only occur when vectorizing memory accesses in the inner-loop
7361 nested within an outer-loop that is being vectorized. */
7362
d1e4b493 7363 if (nested_in_vect_loop
cf098191
RS
7364 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7365 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
7366 {
7367 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7368 compute_in_loop = true;
7369 }
7370
7371 if ((alignment_support_scheme == dr_explicit_realign_optimized
7372 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7373 && !compute_in_loop)
ebfd146a
IR
7374 {
7375 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7376 alignment_support_scheme, NULL_TREE,
7377 &at_loop);
7378 if (alignment_support_scheme == dr_explicit_realign_optimized)
7379 {
538dd0b7 7380 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7381 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7382 size_one_node);
ebfd146a
IR
7383 }
7384 }
7385 else
7386 at_loop = loop;
7387
62da9e14 7388 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
7389 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7390
2de001ee 7391 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
7392 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7393 else
7394 aggr_type = vectype;
7395
c3a8f964 7396 tree vec_mask = NULL_TREE;
ebfd146a 7397 prev_stmt_info = NULL;
4d694b27 7398 poly_uint64 group_elt = 0;
ebfd146a 7399 for (j = 0; j < ncopies; j++)
b8698a0f 7400 {
272c6793 7401 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7402 if (j == 0)
74bf76ed
JJ
7403 {
7404 bool simd_lane_access_p
7405 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7406 if (simd_lane_access_p
7407 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7408 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7409 && integer_zerop (DR_OFFSET (first_dr))
7410 && integer_zerop (DR_INIT (first_dr))
7411 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7412 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
7413 && (alignment_support_scheme == dr_aligned
7414 || alignment_support_scheme == dr_unaligned_supported))
7415 {
7416 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7417 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7418 inv_p = false;
74bf76ed 7419 }
4f0a0218
RB
7420 else if (first_stmt_for_drptr
7421 && first_stmt != first_stmt_for_drptr)
7422 {
7423 dataref_ptr
7424 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7425 at_loop, offset, &dummy, gsi,
7426 &ptr_incr, simd_lane_access_p,
7427 &inv_p, byte_offset);
7428 /* Adjust the pointer by the difference to first_stmt. */
7429 data_reference_p ptrdr
7430 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7431 tree diff = fold_convert (sizetype,
7432 size_binop (MINUS_EXPR,
7433 DR_INIT (first_dr),
7434 DR_INIT (ptrdr)));
7435 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7436 stmt, diff);
7437 }
74bf76ed
JJ
7438 else
7439 dataref_ptr
7440 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7441 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7442 simd_lane_access_p, &inv_p,
7443 byte_offset);
c3a8f964
RS
7444 if (mask)
7445 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7446 mask_vectype);
74bf76ed 7447 }
ebfd146a 7448 else
c3a8f964
RS
7449 {
7450 if (dataref_offset)
7451 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7452 TYPE_SIZE_UNIT (aggr_type));
7453 else
7454 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7455 TYPE_SIZE_UNIT (aggr_type));
7456 if (mask)
7457 {
7458 gimple *def_stmt;
7459 vect_def_type dt;
7460 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
7461 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
7462 }
7463 }
ebfd146a 7464
0d0293ac 7465 if (grouped_load || slp_perm)
9771b263 7466 dr_chain.create (vec_num);
5ce1ee7f 7467
2de001ee 7468 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7469 {
272c6793
RS
7470 tree vec_array;
7471
7472 vec_array = create_vector_array (vectype, vec_num);
7473
7e11fc7f
RS
7474 gcall *call;
7475 if (mask)
7476 {
7477 /* Emit:
7478 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
7479 VEC_MASK). */
7480 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7481 tree alias_ptr = build_int_cst (ref_type, align);
7482 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
7483 dataref_ptr, alias_ptr,
7484 vec_mask);
7485 }
7486 else
7487 {
7488 /* Emit:
7489 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7490 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7491 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7492 }
a844293d
RS
7493 gimple_call_set_lhs (call, vec_array);
7494 gimple_call_set_nothrow (call, true);
7495 new_stmt = call;
272c6793 7496 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7497
272c6793
RS
7498 /* Extract each vector into an SSA_NAME. */
7499 for (i = 0; i < vec_num; i++)
ebfd146a 7500 {
272c6793
RS
7501 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7502 vec_array, i);
9771b263 7503 dr_chain.quick_push (new_temp);
272c6793
RS
7504 }
7505
7506 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7507 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7508 }
7509 else
7510 {
7511 for (i = 0; i < vec_num; i++)
7512 {
7513 if (i > 0)
7514 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7515 stmt, NULL_TREE);
7516
7517 /* 2. Create the vector-load in the loop. */
7518 switch (alignment_support_scheme)
7519 {
7520 case dr_aligned:
7521 case dr_unaligned_supported:
be1ac4ec 7522 {
644ffefd
MJ
7523 unsigned int align, misalign;
7524
f702e7d4 7525 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
7526 if (alignment_support_scheme == dr_aligned)
7527 {
7528 gcc_assert (aligned_access_p (first_dr));
644ffefd 7529 misalign = 0;
272c6793
RS
7530 }
7531 else if (DR_MISALIGNMENT (first_dr) == -1)
7532 {
25f68d90 7533 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7534 misalign = 0;
272c6793
RS
7535 }
7536 else
c3a8f964 7537 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7538 if (dataref_offset == NULL_TREE
7539 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7540 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7541 align, misalign);
c3a8f964
RS
7542
7543 if (mask)
7544 {
7545 align = least_bit_hwi (misalign | align);
7546 tree ptr = build_int_cst (ref_type, align);
7547 gcall *call
7548 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
7549 dataref_ptr, ptr,
7550 vec_mask);
7551 gimple_call_set_nothrow (call, true);
7552 new_stmt = call;
7553 data_ref = NULL_TREE;
7554 }
7555 else
7556 {
7557 data_ref
7558 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7559 dataref_offset
7560 ? dataref_offset
7561 : build_int_cst (ref_type, 0));
7562 if (alignment_support_scheme == dr_aligned)
7563 ;
7564 else if (DR_MISALIGNMENT (first_dr) == -1)
7565 TREE_TYPE (data_ref)
7566 = build_aligned_type (TREE_TYPE (data_ref),
7567 align * BITS_PER_UNIT);
7568 else
7569 TREE_TYPE (data_ref)
7570 = build_aligned_type (TREE_TYPE (data_ref),
7571 TYPE_ALIGN (elem_type));
7572 }
272c6793 7573 break;
be1ac4ec 7574 }
272c6793 7575 case dr_explicit_realign:
267d3070 7576 {
272c6793 7577 tree ptr, bump;
272c6793 7578
d88981fc 7579 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7580
7581 if (compute_in_loop)
7582 msq = vect_setup_realignment (first_stmt, gsi,
7583 &realignment_token,
7584 dr_explicit_realign,
7585 dataref_ptr, NULL);
7586
aed93b23
RB
7587 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7588 ptr = copy_ssa_name (dataref_ptr);
7589 else
7590 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 7591 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
7592 new_stmt = gimple_build_assign
7593 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7594 build_int_cst
7595 (TREE_TYPE (dataref_ptr),
f702e7d4 7596 -(HOST_WIDE_INT) align));
272c6793
RS
7597 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7598 data_ref
7599 = build2 (MEM_REF, vectype, ptr,
44fc7854 7600 build_int_cst (ref_type, 0));
272c6793
RS
7601 vec_dest = vect_create_destination_var (scalar_dest,
7602 vectype);
7603 new_stmt = gimple_build_assign (vec_dest, data_ref);
7604 new_temp = make_ssa_name (vec_dest, new_stmt);
7605 gimple_assign_set_lhs (new_stmt, new_temp);
7606 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7607 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7608 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7609 msq = new_temp;
7610
d88981fc 7611 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7612 TYPE_SIZE_UNIT (elem_type));
d88981fc 7613 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7614 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7615 new_stmt = gimple_build_assign
7616 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 7617 build_int_cst
f702e7d4 7618 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 7619 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7620 gimple_assign_set_lhs (new_stmt, ptr);
7621 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7622 data_ref
7623 = build2 (MEM_REF, vectype, ptr,
44fc7854 7624 build_int_cst (ref_type, 0));
272c6793 7625 break;
267d3070 7626 }
272c6793 7627 case dr_explicit_realign_optimized:
f702e7d4
RS
7628 {
7629 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7630 new_temp = copy_ssa_name (dataref_ptr);
7631 else
7632 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7633 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7634 new_stmt = gimple_build_assign
7635 (new_temp, BIT_AND_EXPR, dataref_ptr,
7636 build_int_cst (TREE_TYPE (dataref_ptr),
7637 -(HOST_WIDE_INT) align));
7638 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7639 data_ref
7640 = build2 (MEM_REF, vectype, new_temp,
7641 build_int_cst (ref_type, 0));
7642 break;
7643 }
272c6793
RS
7644 default:
7645 gcc_unreachable ();
7646 }
ebfd146a 7647 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
7648 /* DATA_REF is null if we've already built the statement. */
7649 if (data_ref)
7650 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a 7651 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 7652 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
7653 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7654
272c6793
RS
7655 /* 3. Handle explicit realignment if necessary/supported.
7656 Create in loop:
7657 vec_dest = realign_load (msq, lsq, realignment_token) */
7658 if (alignment_support_scheme == dr_explicit_realign_optimized
7659 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7660 {
272c6793
RS
7661 lsq = gimple_assign_lhs (new_stmt);
7662 if (!realignment_token)
7663 realignment_token = dataref_ptr;
7664 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7665 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7666 msq, lsq, realignment_token);
272c6793
RS
7667 new_temp = make_ssa_name (vec_dest, new_stmt);
7668 gimple_assign_set_lhs (new_stmt, new_temp);
7669 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7670
7671 if (alignment_support_scheme == dr_explicit_realign_optimized)
7672 {
7673 gcc_assert (phi);
7674 if (i == vec_num - 1 && j == ncopies - 1)
7675 add_phi_arg (phi, lsq,
7676 loop_latch_edge (containing_loop),
9e227d60 7677 UNKNOWN_LOCATION);
272c6793
RS
7678 msq = lsq;
7679 }
ebfd146a 7680 }
ebfd146a 7681
59fd17e3
RB
7682 /* 4. Handle invariant-load. */
7683 if (inv_p && !bb_vinfo)
7684 {
59fd17e3 7685 gcc_assert (!grouped_load);
d1417442
JJ
7686 /* If we have versioned for aliasing or the loop doesn't
7687 have any data dependencies that would preclude this,
7688 then we are sure this is a loop invariant load and
7689 thus we can insert it on the preheader edge. */
7690 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7691 && !nested_in_vect_loop
6b916b36 7692 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7693 {
7694 if (dump_enabled_p ())
7695 {
7696 dump_printf_loc (MSG_NOTE, vect_location,
7697 "hoisting out of the vectorized "
7698 "loop: ");
7699 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7700 }
b731b390 7701 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7702 gsi_insert_on_edge_immediate
7703 (loop_preheader_edge (loop),
7704 gimple_build_assign (tem,
7705 unshare_expr
7706 (gimple_assign_rhs1 (stmt))));
7707 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7708 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7709 set_vinfo_for_stmt (new_stmt,
7710 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7711 }
7712 else
7713 {
7714 gimple_stmt_iterator gsi2 = *gsi;
7715 gsi_next (&gsi2);
7716 new_temp = vect_init_vector (stmt, scalar_dest,
7717 vectype, &gsi2);
34cd48e5 7718 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7719 }
59fd17e3
RB
7720 }
7721
62da9e14 7722 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 7723 {
aec7ae7d
JJ
7724 tree perm_mask = perm_mask_for_reverse (vectype);
7725 new_temp = permute_vec_elements (new_temp, new_temp,
7726 perm_mask, stmt, gsi);
ebfd146a
IR
7727 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7728 }
267d3070 7729
272c6793 7730 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7731 vect_transform_grouped_load (). */
7732 if (grouped_load || slp_perm)
9771b263 7733 dr_chain.quick_push (new_temp);
267d3070 7734
272c6793
RS
7735 /* Store vector loads in the corresponding SLP_NODE. */
7736 if (slp && !slp_perm)
9771b263 7737 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
7738
7739 /* With SLP permutation we load the gaps as well, without
7740 we need to skip the gaps after we manage to fully load
7741 all elements. group_gap_adj is GROUP_SIZE here. */
7742 group_elt += nunits;
d9f21f6a
RS
7743 if (maybe_ne (group_gap_adj, 0U)
7744 && !slp_perm
7745 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 7746 {
d9f21f6a
RS
7747 poly_wide_int bump_val
7748 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7749 * group_gap_adj);
8e6cdc90 7750 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
7751 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7752 stmt, bump);
7753 group_elt = 0;
7754 }
272c6793 7755 }
9b999e8c
RB
7756 /* Bump the vector pointer to account for a gap or for excess
7757 elements loaded for a permuted SLP load. */
d9f21f6a 7758 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 7759 {
d9f21f6a
RS
7760 poly_wide_int bump_val
7761 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7762 * group_gap_adj);
8e6cdc90 7763 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
7764 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7765 stmt, bump);
7766 }
ebfd146a
IR
7767 }
7768
7769 if (slp && !slp_perm)
7770 continue;
7771
7772 if (slp_perm)
7773 {
29afecdf 7774 unsigned n_perms;
01d8bf07 7775 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
7776 slp_node_instance, false,
7777 &n_perms))
ebfd146a 7778 {
9771b263 7779 dr_chain.release ();
ebfd146a
IR
7780 return false;
7781 }
7782 }
7783 else
7784 {
0d0293ac 7785 if (grouped_load)
ebfd146a 7786 {
2de001ee 7787 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 7788 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7789 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7790 }
7791 else
7792 {
7793 if (j == 0)
7794 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7795 else
7796 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7797 prev_stmt_info = vinfo_for_stmt (new_stmt);
7798 }
7799 }
9771b263 7800 dr_chain.release ();
ebfd146a
IR
7801 }
7802
ebfd146a
IR
7803 return true;
7804}
7805
7806/* Function vect_is_simple_cond.
b8698a0f 7807
ebfd146a
IR
7808 Input:
7809 LOOP - the loop that is being vectorized.
7810 COND - Condition that is checked for simple use.
7811
e9e1d143
RG
7812 Output:
7813 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 7814 *DTS - The def types for the arguments of the comparison
e9e1d143 7815
ebfd146a
IR
7816 Returns whether a COND can be vectorized. Checks whether
7817 condition operands are supportable using vec_is_simple_use. */
7818
87aab9b2 7819static bool
4fc5ebf1 7820vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
7821 tree *comp_vectype, enum vect_def_type *dts,
7822 tree vectype)
ebfd146a
IR
7823{
7824 tree lhs, rhs;
e9e1d143 7825 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7826
a414c77f
IE
7827 /* Mask case. */
7828 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 7829 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
7830 {
7831 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7832 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 7833 &dts[0], comp_vectype)
a414c77f
IE
7834 || !*comp_vectype
7835 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7836 return false;
7837 return true;
7838 }
7839
ebfd146a
IR
7840 if (!COMPARISON_CLASS_P (cond))
7841 return false;
7842
7843 lhs = TREE_OPERAND (cond, 0);
7844 rhs = TREE_OPERAND (cond, 1);
7845
7846 if (TREE_CODE (lhs) == SSA_NAME)
7847 {
355fe088 7848 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 7849 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
7850 return false;
7851 }
4fc5ebf1
JG
7852 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7853 || TREE_CODE (lhs) == FIXED_CST)
7854 dts[0] = vect_constant_def;
7855 else
ebfd146a
IR
7856 return false;
7857
7858 if (TREE_CODE (rhs) == SSA_NAME)
7859 {
355fe088 7860 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 7861 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
7862 return false;
7863 }
4fc5ebf1
JG
7864 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7865 || TREE_CODE (rhs) == FIXED_CST)
7866 dts[1] = vect_constant_def;
7867 else
ebfd146a
IR
7868 return false;
7869
28b33016 7870 if (vectype1 && vectype2
928686b1
RS
7871 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
7872 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
7873 return false;
7874
e9e1d143 7875 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8
RB
7876 /* Invariant comparison. */
7877 if (! *comp_vectype)
7878 {
7879 tree scalar_type = TREE_TYPE (lhs);
7880 /* If we can widen the comparison to match vectype do so. */
7881 if (INTEGRAL_TYPE_P (scalar_type)
7882 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7883 TYPE_SIZE (TREE_TYPE (vectype))))
7884 scalar_type = build_nonstandard_integer_type
7885 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7886 TYPE_UNSIGNED (scalar_type));
7887 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7888 }
7889
ebfd146a
IR
7890 return true;
7891}
7892
7893/* vectorizable_condition.
7894
b8698a0f
L
7895 Check if STMT is conditional modify expression that can be vectorized.
7896 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7897 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
7898 at GSI.
7899
7900 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7901 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 7902 else clause if it is 2).
ebfd146a
IR
7903
7904 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7905
4bbe8262 7906bool
355fe088
TS
7907vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7908 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 7909 slp_tree slp_node)
ebfd146a
IR
7910{
7911 tree scalar_dest = NULL_TREE;
7912 tree vec_dest = NULL_TREE;
01216d27
JJ
7913 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7914 tree then_clause, else_clause;
ebfd146a 7915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 7916 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7917 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7918 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 7919 tree vec_compare;
ebfd146a
IR
7920 tree new_temp;
7921 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
7922 enum vect_def_type dts[4]
7923 = {vect_unknown_def_type, vect_unknown_def_type,
7924 vect_unknown_def_type, vect_unknown_def_type};
7925 int ndts = 4;
f7e531cf 7926 int ncopies;
01216d27 7927 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 7928 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
7929 int i, j;
7930 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
7931 vec<tree> vec_oprnds0 = vNULL;
7932 vec<tree> vec_oprnds1 = vNULL;
7933 vec<tree> vec_oprnds2 = vNULL;
7934 vec<tree> vec_oprnds3 = vNULL;
74946978 7935 tree vec_cmp_type;
a414c77f 7936 bool masked = false;
b8698a0f 7937
f7e531cf
IR
7938 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7939 return false;
7940
af29617a
AH
7941 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7942 {
7943 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7944 return false;
ebfd146a 7945
af29617a
AH
7946 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7947 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7948 && reduc_def))
7949 return false;
ebfd146a 7950
af29617a
AH
7951 /* FORNOW: not yet supported. */
7952 if (STMT_VINFO_LIVE_P (stmt_info))
7953 {
7954 if (dump_enabled_p ())
7955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7956 "value used after loop.\n");
7957 return false;
7958 }
ebfd146a
IR
7959 }
7960
7961 /* Is vectorizable conditional operation? */
7962 if (!is_gimple_assign (stmt))
7963 return false;
7964
7965 code = gimple_assign_rhs_code (stmt);
7966
7967 if (code != COND_EXPR)
7968 return false;
7969
465c8c19 7970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 7971 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 7972
fce57248 7973 if (slp_node)
465c8c19
JJ
7974 ncopies = 1;
7975 else
e8f142e2 7976 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
7977
7978 gcc_assert (ncopies >= 1);
7979 if (reduc_index && ncopies > 1)
7980 return false; /* FORNOW */
7981
4e71066d
RG
7982 cond_expr = gimple_assign_rhs1 (stmt);
7983 then_clause = gimple_assign_rhs2 (stmt);
7984 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 7985
4fc5ebf1 7986 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8da4c8d8 7987 &comp_vectype, &dts[0], vectype)
e9e1d143 7988 || !comp_vectype)
ebfd146a
IR
7989 return false;
7990
81c40241 7991 gimple *def_stmt;
4fc5ebf1 7992 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
7993 &vectype1))
7994 return false;
4fc5ebf1 7995 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 7996 &vectype2))
ebfd146a 7997 return false;
2947d3b2
IE
7998
7999 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8000 return false;
8001
8002 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8003 return false;
8004
28b33016
IE
8005 masked = !COMPARISON_CLASS_P (cond_expr);
8006 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8007
74946978
MP
8008 if (vec_cmp_type == NULL_TREE)
8009 return false;
784fb9b3 8010
01216d27
JJ
8011 cond_code = TREE_CODE (cond_expr);
8012 if (!masked)
8013 {
8014 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8015 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8016 }
8017
8018 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8019 {
8020 /* Boolean values may have another representation in vectors
8021 and therefore we prefer bit operations over comparison for
8022 them (which also works for scalar masks). We store opcodes
8023 to use in bitop1 and bitop2. Statement is vectorized as
8024 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8025 depending on bitop1 and bitop2 arity. */
8026 switch (cond_code)
8027 {
8028 case GT_EXPR:
8029 bitop1 = BIT_NOT_EXPR;
8030 bitop2 = BIT_AND_EXPR;
8031 break;
8032 case GE_EXPR:
8033 bitop1 = BIT_NOT_EXPR;
8034 bitop2 = BIT_IOR_EXPR;
8035 break;
8036 case LT_EXPR:
8037 bitop1 = BIT_NOT_EXPR;
8038 bitop2 = BIT_AND_EXPR;
8039 std::swap (cond_expr0, cond_expr1);
8040 break;
8041 case LE_EXPR:
8042 bitop1 = BIT_NOT_EXPR;
8043 bitop2 = BIT_IOR_EXPR;
8044 std::swap (cond_expr0, cond_expr1);
8045 break;
8046 case NE_EXPR:
8047 bitop1 = BIT_XOR_EXPR;
8048 break;
8049 case EQ_EXPR:
8050 bitop1 = BIT_XOR_EXPR;
8051 bitop2 = BIT_NOT_EXPR;
8052 break;
8053 default:
8054 return false;
8055 }
8056 cond_code = SSA_NAME;
8057 }
8058
b8698a0f 8059 if (!vec_stmt)
ebfd146a
IR
8060 {
8061 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
01216d27
JJ
8062 if (bitop1 != NOP_EXPR)
8063 {
8064 machine_mode mode = TYPE_MODE (comp_vectype);
8065 optab optab;
8066
8067 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8068 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8069 return false;
8070
8071 if (bitop2 != NOP_EXPR)
8072 {
8073 optab = optab_for_tree_code (bitop2, comp_vectype,
8074 optab_default);
8075 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8076 return false;
8077 }
8078 }
4fc5ebf1
JG
8079 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8080 cond_code))
8081 {
8082 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8083 return true;
8084 }
8085 return false;
ebfd146a
IR
8086 }
8087
f7e531cf
IR
8088 /* Transform. */
8089
8090 if (!slp_node)
8091 {
9771b263
DN
8092 vec_oprnds0.create (1);
8093 vec_oprnds1.create (1);
8094 vec_oprnds2.create (1);
8095 vec_oprnds3.create (1);
f7e531cf 8096 }
ebfd146a
IR
8097
8098 /* Handle def. */
8099 scalar_dest = gimple_assign_lhs (stmt);
8100 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8101
8102 /* Handle cond expr. */
a855b1b1
MM
8103 for (j = 0; j < ncopies; j++)
8104 {
538dd0b7 8105 gassign *new_stmt = NULL;
a855b1b1
MM
8106 if (j == 0)
8107 {
f7e531cf
IR
8108 if (slp_node)
8109 {
00f96dc9
TS
8110 auto_vec<tree, 4> ops;
8111 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8112
a414c77f 8113 if (masked)
01216d27 8114 ops.safe_push (cond_expr);
a414c77f
IE
8115 else
8116 {
01216d27
JJ
8117 ops.safe_push (cond_expr0);
8118 ops.safe_push (cond_expr1);
a414c77f 8119 }
9771b263
DN
8120 ops.safe_push (then_clause);
8121 ops.safe_push (else_clause);
306b0c92 8122 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8123 vec_oprnds3 = vec_defs.pop ();
8124 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8125 if (!masked)
8126 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8127 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8128 }
8129 else
8130 {
355fe088 8131 gimple *gtemp;
a414c77f
IE
8132 if (masked)
8133 {
8134 vec_cond_lhs
8135 = vect_get_vec_def_for_operand (cond_expr, stmt,
8136 comp_vectype);
8137 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8138 &gtemp, &dts[0]);
8139 }
8140 else
8141 {
01216d27
JJ
8142 vec_cond_lhs
8143 = vect_get_vec_def_for_operand (cond_expr0,
8144 stmt, comp_vectype);
8145 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8146
8147 vec_cond_rhs
8148 = vect_get_vec_def_for_operand (cond_expr1,
8149 stmt, comp_vectype);
8150 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8151 }
f7e531cf
IR
8152 if (reduc_index == 1)
8153 vec_then_clause = reduc_def;
8154 else
8155 {
8156 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8157 stmt);
8158 vect_is_simple_use (then_clause, loop_vinfo,
8159 &gtemp, &dts[2]);
f7e531cf
IR
8160 }
8161 if (reduc_index == 2)
8162 vec_else_clause = reduc_def;
8163 else
8164 {
8165 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8166 stmt);
8167 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8168 }
a855b1b1
MM
8169 }
8170 }
8171 else
8172 {
a414c77f
IE
8173 vec_cond_lhs
8174 = vect_get_vec_def_for_stmt_copy (dts[0],
8175 vec_oprnds0.pop ());
8176 if (!masked)
8177 vec_cond_rhs
8178 = vect_get_vec_def_for_stmt_copy (dts[1],
8179 vec_oprnds1.pop ());
8180
a855b1b1 8181 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8182 vec_oprnds2.pop ());
a855b1b1 8183 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8184 vec_oprnds3.pop ());
f7e531cf
IR
8185 }
8186
8187 if (!slp_node)
8188 {
9771b263 8189 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8190 if (!masked)
8191 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8192 vec_oprnds2.quick_push (vec_then_clause);
8193 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8194 }
8195
9dc3f7de 8196 /* Arguments are ready. Create the new vector stmt. */
9771b263 8197 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8198 {
9771b263
DN
8199 vec_then_clause = vec_oprnds2[i];
8200 vec_else_clause = vec_oprnds3[i];
a855b1b1 8201
a414c77f
IE
8202 if (masked)
8203 vec_compare = vec_cond_lhs;
8204 else
8205 {
8206 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8207 if (bitop1 == NOP_EXPR)
8208 vec_compare = build2 (cond_code, vec_cmp_type,
8209 vec_cond_lhs, vec_cond_rhs);
8210 else
8211 {
8212 new_temp = make_ssa_name (vec_cmp_type);
8213 if (bitop1 == BIT_NOT_EXPR)
8214 new_stmt = gimple_build_assign (new_temp, bitop1,
8215 vec_cond_rhs);
8216 else
8217 new_stmt
8218 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8219 vec_cond_rhs);
8220 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8221 if (bitop2 == NOP_EXPR)
8222 vec_compare = new_temp;
8223 else if (bitop2 == BIT_NOT_EXPR)
8224 {
8225 /* Instead of doing ~x ? y : z do x ? z : y. */
8226 vec_compare = new_temp;
8227 std::swap (vec_then_clause, vec_else_clause);
8228 }
8229 else
8230 {
8231 vec_compare = make_ssa_name (vec_cmp_type);
8232 new_stmt
8233 = gimple_build_assign (vec_compare, bitop2,
8234 vec_cond_lhs, new_temp);
8235 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8236 }
8237 }
a414c77f 8238 }
5958f9e2
JJ
8239 new_temp = make_ssa_name (vec_dest);
8240 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8241 vec_compare, vec_then_clause,
8242 vec_else_clause);
f7e531cf
IR
8243 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8244 if (slp_node)
9771b263 8245 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
8246 }
8247
8248 if (slp_node)
8249 continue;
8250
8251 if (j == 0)
8252 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8253 else
8254 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8255
8256 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 8257 }
b8698a0f 8258
9771b263
DN
8259 vec_oprnds0.release ();
8260 vec_oprnds1.release ();
8261 vec_oprnds2.release ();
8262 vec_oprnds3.release ();
f7e531cf 8263
ebfd146a
IR
8264 return true;
8265}
8266
42fd8198
IE
8267/* vectorizable_comparison.
8268
8269 Check if STMT is comparison expression that can be vectorized.
8270 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8271 comparison, put it in VEC_STMT, and insert it at GSI.
8272
8273 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8274
fce57248 8275static bool
42fd8198
IE
8276vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8277 gimple **vec_stmt, tree reduc_def,
8278 slp_tree slp_node)
8279{
8280 tree lhs, rhs1, rhs2;
8281 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8282 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8283 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8284 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8285 tree new_temp;
8286 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8287 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 8288 int ndts = 2;
928686b1 8289 poly_uint64 nunits;
42fd8198 8290 int ncopies;
49e76ff1 8291 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
8292 stmt_vec_info prev_stmt_info = NULL;
8293 int i, j;
8294 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8295 vec<tree> vec_oprnds0 = vNULL;
8296 vec<tree> vec_oprnds1 = vNULL;
8297 gimple *def_stmt;
8298 tree mask_type;
8299 tree mask;
8300
c245362b
IE
8301 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8302 return false;
8303
30480bcd 8304 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
8305 return false;
8306
8307 mask_type = vectype;
8308 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8309
fce57248 8310 if (slp_node)
42fd8198
IE
8311 ncopies = 1;
8312 else
e8f142e2 8313 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
8314
8315 gcc_assert (ncopies >= 1);
42fd8198
IE
8316 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8317 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8318 && reduc_def))
8319 return false;
8320
8321 if (STMT_VINFO_LIVE_P (stmt_info))
8322 {
8323 if (dump_enabled_p ())
8324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8325 "value used after loop.\n");
8326 return false;
8327 }
8328
8329 if (!is_gimple_assign (stmt))
8330 return false;
8331
8332 code = gimple_assign_rhs_code (stmt);
8333
8334 if (TREE_CODE_CLASS (code) != tcc_comparison)
8335 return false;
8336
8337 rhs1 = gimple_assign_rhs1 (stmt);
8338 rhs2 = gimple_assign_rhs2 (stmt);
8339
8340 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8341 &dts[0], &vectype1))
8342 return false;
8343
8344 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8345 &dts[1], &vectype2))
8346 return false;
8347
8348 if (vectype1 && vectype2
928686b1
RS
8349 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8350 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
8351 return false;
8352
8353 vectype = vectype1 ? vectype1 : vectype2;
8354
8355 /* Invariant comparison. */
8356 if (!vectype)
8357 {
69a9a66f 8358 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 8359 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
8360 return false;
8361 }
928686b1 8362 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
8363 return false;
8364
49e76ff1
IE
8365 /* Can't compare mask and non-mask types. */
8366 if (vectype1 && vectype2
8367 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8368 return false;
8369
8370 /* Boolean values may have another representation in vectors
8371 and therefore we prefer bit operations over comparison for
8372 them (which also works for scalar masks). We store opcodes
8373 to use in bitop1 and bitop2. Statement is vectorized as
8374 BITOP2 (rhs1 BITOP1 rhs2) or
8375 rhs1 BITOP2 (BITOP1 rhs2)
8376 depending on bitop1 and bitop2 arity. */
8377 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8378 {
8379 if (code == GT_EXPR)
8380 {
8381 bitop1 = BIT_NOT_EXPR;
8382 bitop2 = BIT_AND_EXPR;
8383 }
8384 else if (code == GE_EXPR)
8385 {
8386 bitop1 = BIT_NOT_EXPR;
8387 bitop2 = BIT_IOR_EXPR;
8388 }
8389 else if (code == LT_EXPR)
8390 {
8391 bitop1 = BIT_NOT_EXPR;
8392 bitop2 = BIT_AND_EXPR;
8393 std::swap (rhs1, rhs2);
264d951a 8394 std::swap (dts[0], dts[1]);
49e76ff1
IE
8395 }
8396 else if (code == LE_EXPR)
8397 {
8398 bitop1 = BIT_NOT_EXPR;
8399 bitop2 = BIT_IOR_EXPR;
8400 std::swap (rhs1, rhs2);
264d951a 8401 std::swap (dts[0], dts[1]);
49e76ff1
IE
8402 }
8403 else
8404 {
8405 bitop1 = BIT_XOR_EXPR;
8406 if (code == EQ_EXPR)
8407 bitop2 = BIT_NOT_EXPR;
8408 }
8409 }
8410
42fd8198
IE
8411 if (!vec_stmt)
8412 {
8413 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
49e76ff1 8414 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
4fc5ebf1 8415 dts, ndts, NULL, NULL);
49e76ff1 8416 if (bitop1 == NOP_EXPR)
96592eed 8417 return expand_vec_cmp_expr_p (vectype, mask_type, code);
49e76ff1
IE
8418 else
8419 {
8420 machine_mode mode = TYPE_MODE (vectype);
8421 optab optab;
8422
8423 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8424 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8425 return false;
8426
8427 if (bitop2 != NOP_EXPR)
8428 {
8429 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8430 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8431 return false;
8432 }
8433 return true;
8434 }
42fd8198
IE
8435 }
8436
8437 /* Transform. */
8438 if (!slp_node)
8439 {
8440 vec_oprnds0.create (1);
8441 vec_oprnds1.create (1);
8442 }
8443
8444 /* Handle def. */
8445 lhs = gimple_assign_lhs (stmt);
8446 mask = vect_create_destination_var (lhs, mask_type);
8447
8448 /* Handle cmp expr. */
8449 for (j = 0; j < ncopies; j++)
8450 {
8451 gassign *new_stmt = NULL;
8452 if (j == 0)
8453 {
8454 if (slp_node)
8455 {
8456 auto_vec<tree, 2> ops;
8457 auto_vec<vec<tree>, 2> vec_defs;
8458
8459 ops.safe_push (rhs1);
8460 ops.safe_push (rhs2);
306b0c92 8461 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
8462 vec_oprnds1 = vec_defs.pop ();
8463 vec_oprnds0 = vec_defs.pop ();
8464 }
8465 else
8466 {
e4af0bc4
IE
8467 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8468 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
8469 }
8470 }
8471 else
8472 {
8473 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8474 vec_oprnds0.pop ());
8475 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8476 vec_oprnds1.pop ());
8477 }
8478
8479 if (!slp_node)
8480 {
8481 vec_oprnds0.quick_push (vec_rhs1);
8482 vec_oprnds1.quick_push (vec_rhs2);
8483 }
8484
8485 /* Arguments are ready. Create the new vector stmt. */
8486 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8487 {
8488 vec_rhs2 = vec_oprnds1[i];
8489
8490 new_temp = make_ssa_name (mask);
49e76ff1
IE
8491 if (bitop1 == NOP_EXPR)
8492 {
8493 new_stmt = gimple_build_assign (new_temp, code,
8494 vec_rhs1, vec_rhs2);
8495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8496 }
8497 else
8498 {
8499 if (bitop1 == BIT_NOT_EXPR)
8500 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8501 else
8502 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8503 vec_rhs2);
8504 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8505 if (bitop2 != NOP_EXPR)
8506 {
8507 tree res = make_ssa_name (mask);
8508 if (bitop2 == BIT_NOT_EXPR)
8509 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8510 else
8511 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8512 new_temp);
8513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8514 }
8515 }
42fd8198
IE
8516 if (slp_node)
8517 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8518 }
8519
8520 if (slp_node)
8521 continue;
8522
8523 if (j == 0)
8524 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8525 else
8526 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8527
8528 prev_stmt_info = vinfo_for_stmt (new_stmt);
8529 }
8530
8531 vec_oprnds0.release ();
8532 vec_oprnds1.release ();
8533
8534 return true;
8535}
ebfd146a 8536
68a0f2ff
RS
8537/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8538 can handle all live statements in the node. Otherwise return true
8539 if STMT is not live or if vectorizable_live_operation can handle it.
8540 GSI and VEC_STMT are as for vectorizable_live_operation. */
8541
8542static bool
8543can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8544 slp_tree slp_node, gimple **vec_stmt)
8545{
8546 if (slp_node)
8547 {
8548 gimple *slp_stmt;
8549 unsigned int i;
8550 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8551 {
8552 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8553 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8554 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8555 vec_stmt))
8556 return false;
8557 }
8558 }
8559 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8560 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8561 return false;
8562
8563 return true;
8564}
8565
8644a673 8566/* Make sure the statement is vectorizable. */
ebfd146a
IR
8567
8568bool
891ad31c
RB
8569vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8570 slp_instance node_instance)
ebfd146a 8571{
8644a673 8572 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 8573 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 8574 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 8575 bool ok;
355fe088 8576 gimple *pattern_stmt;
363477c0 8577 gimple_seq pattern_def_seq;
ebfd146a 8578
73fbfcad 8579 if (dump_enabled_p ())
ebfd146a 8580 {
78c60e3d
SS
8581 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8582 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 8583 }
ebfd146a 8584
1825a1f3 8585 if (gimple_has_volatile_ops (stmt))
b8698a0f 8586 {
73fbfcad 8587 if (dump_enabled_p ())
78c60e3d 8588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8589 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
8590
8591 return false;
8592 }
b8698a0f
L
8593
8594 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
8595 to include:
8596 - the COND_EXPR which is the loop exit condition
8597 - any LABEL_EXPRs in the loop
b8698a0f 8598 - computations that are used only for array indexing or loop control.
8644a673 8599 In basic blocks we only analyze statements that are a part of some SLP
83197f37 8600 instance, therefore, all the statements are relevant.
ebfd146a 8601
d092494c 8602 Pattern statement needs to be analyzed instead of the original statement
83197f37 8603 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
8604 statements. In basic blocks we are called from some SLP instance
8605 traversal, don't analyze pattern stmts instead, the pattern stmts
8606 already will be part of SLP instance. */
83197f37
IR
8607
8608 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 8609 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 8610 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 8611 {
9d5e7640 8612 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 8613 && pattern_stmt
9d5e7640
IR
8614 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8615 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8616 {
83197f37 8617 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
8618 stmt = pattern_stmt;
8619 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 8620 if (dump_enabled_p ())
9d5e7640 8621 {
78c60e3d
SS
8622 dump_printf_loc (MSG_NOTE, vect_location,
8623 "==> examining pattern statement: ");
8624 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
8625 }
8626 }
8627 else
8628 {
73fbfcad 8629 if (dump_enabled_p ())
e645e942 8630 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 8631
9d5e7640
IR
8632 return true;
8633 }
8644a673 8634 }
83197f37 8635 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 8636 && node == NULL
83197f37
IR
8637 && pattern_stmt
8638 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8639 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8640 {
8641 /* Analyze PATTERN_STMT too. */
73fbfcad 8642 if (dump_enabled_p ())
83197f37 8643 {
78c60e3d
SS
8644 dump_printf_loc (MSG_NOTE, vect_location,
8645 "==> examining pattern statement: ");
8646 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
8647 }
8648
891ad31c
RB
8649 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8650 node_instance))
83197f37
IR
8651 return false;
8652 }
ebfd146a 8653
1107f3ae 8654 if (is_pattern_stmt_p (stmt_info)
079c527f 8655 && node == NULL
363477c0 8656 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 8657 {
363477c0 8658 gimple_stmt_iterator si;
1107f3ae 8659
363477c0
JJ
8660 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8661 {
355fe088 8662 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
8663 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8664 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8665 {
8666 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8667 if (dump_enabled_p ())
363477c0 8668 {
78c60e3d
SS
8669 dump_printf_loc (MSG_NOTE, vect_location,
8670 "==> examining pattern def statement: ");
8671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8672 }
1107f3ae 8673
363477c0 8674 if (!vect_analyze_stmt (pattern_def_stmt,
891ad31c 8675 need_to_vectorize, node, node_instance))
363477c0
JJ
8676 return false;
8677 }
8678 }
8679 }
1107f3ae 8680
8644a673
IR
8681 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8682 {
8683 case vect_internal_def:
8684 break;
ebfd146a 8685
8644a673 8686 case vect_reduction_def:
7c5222ff 8687 case vect_nested_cycle:
14a61437
RB
8688 gcc_assert (!bb_vinfo
8689 && (relevance == vect_used_in_outer
8690 || relevance == vect_used_in_outer_by_reduction
8691 || relevance == vect_used_by_reduction
b28ead45
AH
8692 || relevance == vect_unused_in_scope
8693 || relevance == vect_used_only_live));
8644a673
IR
8694 break;
8695
8696 case vect_induction_def:
e7baeb39
RB
8697 gcc_assert (!bb_vinfo);
8698 break;
8699
8644a673
IR
8700 case vect_constant_def:
8701 case vect_external_def:
8702 case vect_unknown_def_type:
8703 default:
8704 gcc_unreachable ();
8705 }
ebfd146a 8706
8644a673 8707 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8708 {
8644a673 8709 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8710 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8711 || (is_gimple_call (stmt)
8712 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8713 *need_to_vectorize = true;
ebfd146a
IR
8714 }
8715
b1af7da6
RB
8716 if (PURE_SLP_STMT (stmt_info) && !node)
8717 {
8718 dump_printf_loc (MSG_NOTE, vect_location,
8719 "handled only by SLP analysis\n");
8720 return true;
8721 }
8722
8723 ok = true;
8724 if (!bb_vinfo
8725 && (STMT_VINFO_RELEVANT_P (stmt_info)
8726 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8727 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8728 || vectorizable_conversion (stmt, NULL, NULL, node)
8729 || vectorizable_shift (stmt, NULL, NULL, node)
8730 || vectorizable_operation (stmt, NULL, NULL, node)
8731 || vectorizable_assignment (stmt, NULL, NULL, node)
8732 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8733 || vectorizable_call (stmt, NULL, NULL, node)
8734 || vectorizable_store (stmt, NULL, NULL, node)
891ad31c 8735 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
e7baeb39 8736 || vectorizable_induction (stmt, NULL, NULL, node)
42fd8198
IE
8737 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8738 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8739 else
8740 {
8741 if (bb_vinfo)
8742 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8743 || vectorizable_conversion (stmt, NULL, NULL, node)
8744 || vectorizable_shift (stmt, NULL, NULL, node)
8745 || vectorizable_operation (stmt, NULL, NULL, node)
8746 || vectorizable_assignment (stmt, NULL, NULL, node)
8747 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8748 || vectorizable_call (stmt, NULL, NULL, node)
8749 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8750 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8751 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8752 }
8644a673
IR
8753
8754 if (!ok)
ebfd146a 8755 {
73fbfcad 8756 if (dump_enabled_p ())
8644a673 8757 {
78c60e3d
SS
8758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8759 "not vectorized: relevant stmt not ");
8760 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8762 }
b8698a0f 8763
ebfd146a
IR
8764 return false;
8765 }
8766
a70d6342
IR
8767 if (bb_vinfo)
8768 return true;
8769
8644a673
IR
8770 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8771 need extra handling, except for vectorizable reductions. */
68a0f2ff
RS
8772 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8773 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
ebfd146a 8774 {
73fbfcad 8775 if (dump_enabled_p ())
8644a673 8776 {
78c60e3d 8777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 8778 "not vectorized: live stmt not supported: ");
78c60e3d 8779 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8780 }
b8698a0f 8781
8644a673 8782 return false;
ebfd146a
IR
8783 }
8784
ebfd146a
IR
8785 return true;
8786}
8787
8788
8789/* Function vect_transform_stmt.
8790
8791 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8792
8793bool
355fe088 8794vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8795 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8796 slp_instance slp_node_instance)
8797{
8798 bool is_store = false;
355fe088 8799 gimple *vec_stmt = NULL;
ebfd146a 8800 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8801 bool done;
ebfd146a 8802
fce57248 8803 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 8804 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8805
ebfd146a
IR
8806 switch (STMT_VINFO_TYPE (stmt_info))
8807 {
8808 case type_demotion_vec_info_type:
ebfd146a 8809 case type_promotion_vec_info_type:
ebfd146a
IR
8810 case type_conversion_vec_info_type:
8811 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8812 gcc_assert (done);
8813 break;
8814
8815 case induc_vec_info_type:
e7baeb39 8816 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8817 gcc_assert (done);
8818 break;
8819
9dc3f7de
IR
8820 case shift_vec_info_type:
8821 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8822 gcc_assert (done);
8823 break;
8824
ebfd146a
IR
8825 case op_vec_info_type:
8826 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8827 gcc_assert (done);
8828 break;
8829
8830 case assignment_vec_info_type:
8831 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8832 gcc_assert (done);
8833 break;
8834
8835 case load_vec_info_type:
b8698a0f 8836 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
8837 slp_node_instance);
8838 gcc_assert (done);
8839 break;
8840
8841 case store_vec_info_type:
8842 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8843 gcc_assert (done);
0d0293ac 8844 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
8845 {
8846 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 8847 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
8848 one are skipped, and there vec_stmt_info shouldn't be freed
8849 meanwhile. */
0d0293ac 8850 *grouped_store = true;
ebfd146a
IR
8851 if (STMT_VINFO_VEC_STMT (stmt_info))
8852 is_store = true;
8853 }
8854 else
8855 is_store = true;
8856 break;
8857
8858 case condition_vec_info_type:
f7e531cf 8859 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
8860 gcc_assert (done);
8861 break;
8862
42fd8198
IE
8863 case comparison_vec_info_type:
8864 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8865 gcc_assert (done);
8866 break;
8867
ebfd146a 8868 case call_vec_info_type:
190c2236 8869 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 8870 stmt = gsi_stmt (*gsi);
ebfd146a
IR
8871 break;
8872
0136f8f0
AH
8873 case call_simd_clone_vec_info_type:
8874 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8875 stmt = gsi_stmt (*gsi);
8876 break;
8877
ebfd146a 8878 case reduc_vec_info_type:
891ad31c
RB
8879 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8880 slp_node_instance);
ebfd146a
IR
8881 gcc_assert (done);
8882 break;
8883
8884 default:
8885 if (!STMT_VINFO_LIVE_P (stmt_info))
8886 {
73fbfcad 8887 if (dump_enabled_p ())
78c60e3d 8888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8889 "stmt not supported.\n");
ebfd146a
IR
8890 gcc_unreachable ();
8891 }
8892 }
8893
225ce44b
RB
8894 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8895 This would break hybrid SLP vectorization. */
8896 if (slp_node)
d90f8440
RB
8897 gcc_assert (!vec_stmt
8898 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 8899
ebfd146a
IR
8900 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8901 is being vectorized, but outside the immediately enclosing loop. */
8902 if (vec_stmt
a70d6342
IR
8903 && STMT_VINFO_LOOP_VINFO (stmt_info)
8904 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8905 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
8906 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8907 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 8908 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 8909 vect_used_in_outer_by_reduction))
ebfd146a 8910 {
a70d6342
IR
8911 struct loop *innerloop = LOOP_VINFO_LOOP (
8912 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
8913 imm_use_iterator imm_iter;
8914 use_operand_p use_p;
8915 tree scalar_dest;
355fe088 8916 gimple *exit_phi;
ebfd146a 8917
73fbfcad 8918 if (dump_enabled_p ())
78c60e3d 8919 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 8920 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
8921
8922 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8923 (to be used when vectorizing outer-loop stmts that use the DEF of
8924 STMT). */
8925 if (gimple_code (stmt) == GIMPLE_PHI)
8926 scalar_dest = PHI_RESULT (stmt);
8927 else
8928 scalar_dest = gimple_assign_lhs (stmt);
8929
8930 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8931 {
8932 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8933 {
8934 exit_phi = USE_STMT (use_p);
8935 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8936 }
8937 }
8938 }
8939
8940 /* Handle stmts whose DEF is used outside the loop-nest that is
8941 being vectorized. */
68a0f2ff 8942 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 8943 {
68a0f2ff 8944 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
ebfd146a
IR
8945 gcc_assert (done);
8946 }
8947
8948 if (vec_stmt)
83197f37 8949 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 8950
b8698a0f 8951 return is_store;
ebfd146a
IR
8952}
8953
8954
b8698a0f 8955/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
8956 stmt_vec_info. */
8957
8958void
355fe088 8959vect_remove_stores (gimple *first_stmt)
ebfd146a 8960{
355fe088
TS
8961 gimple *next = first_stmt;
8962 gimple *tmp;
ebfd146a
IR
8963 gimple_stmt_iterator next_si;
8964
8965 while (next)
8966 {
78048b1c
JJ
8967 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8968
8969 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8970 if (is_pattern_stmt_p (stmt_info))
8971 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
8972 /* Free the attached stmt_vec_info and remove the stmt. */
8973 next_si = gsi_for_stmt (next);
3d3f2249 8974 unlink_stmt_vdef (next);
ebfd146a 8975 gsi_remove (&next_si, true);
3d3f2249 8976 release_defs (next);
ebfd146a
IR
8977 free_stmt_vec_info (next);
8978 next = tmp;
8979 }
8980}
8981
8982
8983/* Function new_stmt_vec_info.
8984
8985 Create and initialize a new stmt_vec_info struct for STMT. */
8986
8987stmt_vec_info
310213d4 8988new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
8989{
8990 stmt_vec_info res;
8991 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8992
8993 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8994 STMT_VINFO_STMT (res) = stmt;
310213d4 8995 res->vinfo = vinfo;
8644a673 8996 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
8997 STMT_VINFO_LIVE_P (res) = false;
8998 STMT_VINFO_VECTYPE (res) = NULL;
8999 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9000 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9001 STMT_VINFO_IN_PATTERN_P (res) = false;
9002 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9003 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9004 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9005 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9006 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9007
ebfd146a
IR
9008 if (gimple_code (stmt) == GIMPLE_PHI
9009 && is_loop_header_bb_p (gimple_bb (stmt)))
9010 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9011 else
8644a673
IR
9012 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9013
9771b263 9014 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9015 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9016 STMT_VINFO_NUM_SLP_USES (res) = 0;
9017
e14c1050
IR
9018 GROUP_FIRST_ELEMENT (res) = NULL;
9019 GROUP_NEXT_ELEMENT (res) = NULL;
9020 GROUP_SIZE (res) = 0;
9021 GROUP_STORE_COUNT (res) = 0;
9022 GROUP_GAP (res) = 0;
9023 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9024
9025 return res;
9026}
9027
9028
9029/* Create a hash table for stmt_vec_info. */
9030
9031void
9032init_stmt_vec_info_vec (void)
9033{
9771b263
DN
9034 gcc_assert (!stmt_vec_info_vec.exists ());
9035 stmt_vec_info_vec.create (50);
ebfd146a
IR
9036}
9037
9038
9039/* Free hash table for stmt_vec_info. */
9040
9041void
9042free_stmt_vec_info_vec (void)
9043{
93675444 9044 unsigned int i;
3161455c 9045 stmt_vec_info info;
93675444
JJ
9046 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9047 if (info != NULL)
3161455c 9048 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9049 gcc_assert (stmt_vec_info_vec.exists ());
9050 stmt_vec_info_vec.release ();
ebfd146a
IR
9051}
9052
9053
9054/* Free stmt vectorization related info. */
9055
9056void
355fe088 9057free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9058{
9059 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9060
9061 if (!stmt_info)
9062 return;
9063
78048b1c
JJ
9064 /* Check if this statement has a related "pattern stmt"
9065 (introduced by the vectorizer during the pattern recognition
9066 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9067 too. */
9068 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9069 {
9070 stmt_vec_info patt_info
9071 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9072 if (patt_info)
9073 {
363477c0 9074 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9075 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9076 gimple_set_bb (patt_stmt, NULL);
9077 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9078 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9079 release_ssa_name (lhs);
363477c0
JJ
9080 if (seq)
9081 {
9082 gimple_stmt_iterator si;
9083 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9084 {
355fe088 9085 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9086 gimple_set_bb (seq_stmt, NULL);
7532abf2 9087 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9088 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9089 release_ssa_name (lhs);
9090 free_stmt_vec_info (seq_stmt);
9091 }
363477c0 9092 }
f0281fde 9093 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9094 }
9095 }
9096
9771b263 9097 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9098 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9099 set_vinfo_for_stmt (stmt, NULL);
9100 free (stmt_info);
9101}
9102
9103
bb67d9c7 9104/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9105
bb67d9c7 9106 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9107 by the target. */
9108
c803b2a9 9109tree
86e36728 9110get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9111{
c7d97b28 9112 tree orig_scalar_type = scalar_type;
3bd8f481 9113 scalar_mode inner_mode;
ef4bddc2 9114 machine_mode simd_mode;
86e36728 9115 poly_uint64 nunits;
ebfd146a
IR
9116 tree vectype;
9117
3bd8f481
RS
9118 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9119 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9120 return NULL_TREE;
9121
3bd8f481 9122 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9123
7b7b1813
RG
9124 /* For vector types of elements whose mode precision doesn't
9125 match their types precision we use a element type of mode
9126 precision. The vectorization routines will have to make sure
48f2e373
RB
9127 they support the proper result truncation/extension.
9128 We also make sure to build vector types with INTEGER_TYPE
9129 component type only. */
6d7971b8 9130 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9131 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9132 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9133 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9134 TYPE_UNSIGNED (scalar_type));
6d7971b8 9135
ccbf5bb4
RG
9136 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9137 When the component mode passes the above test simply use a type
9138 corresponding to that mode. The theory is that any use that
9139 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9140 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9141 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9142 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9143
9144 /* We can't build a vector type of elements with alignment bigger than
9145 their size. */
dfc2e2ac 9146 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9147 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9148 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9149
dfc2e2ac
RB
9150 /* If we felt back to using the mode fail if there was
9151 no scalar type for it. */
9152 if (scalar_type == NULL_TREE)
9153 return NULL_TREE;
9154
bb67d9c7
RG
9155 /* If no size was supplied use the mode the target prefers. Otherwise
9156 lookup a vector mode of the specified size. */
86e36728 9157 if (known_eq (size, 0U))
bb67d9c7 9158 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9159 else if (!multiple_p (size, nbytes, &nunits)
9160 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9161 return NULL_TREE;
4c8fd8ac 9162 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9163 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9164 return NULL_TREE;
ebfd146a
IR
9165
9166 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9167
9168 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9169 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9170 return NULL_TREE;
ebfd146a 9171
c7d97b28
RB
9172 /* Re-attach the address-space qualifier if we canonicalized the scalar
9173 type. */
9174 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9175 return build_qualified_type
9176 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9177
ebfd146a
IR
9178 return vectype;
9179}
9180
86e36728 9181poly_uint64 current_vector_size;
bb67d9c7
RG
9182
9183/* Function get_vectype_for_scalar_type.
9184
9185 Returns the vector type corresponding to SCALAR_TYPE as supported
9186 by the target. */
9187
9188tree
9189get_vectype_for_scalar_type (tree scalar_type)
9190{
9191 tree vectype;
9192 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9193 current_vector_size);
9194 if (vectype
86e36728 9195 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9196 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9197 return vectype;
9198}
9199
42fd8198
IE
9200/* Function get_mask_type_for_scalar_type.
9201
9202 Returns the mask type corresponding to a result of comparison
9203 of vectors of specified SCALAR_TYPE as supported by target. */
9204
9205tree
9206get_mask_type_for_scalar_type (tree scalar_type)
9207{
9208 tree vectype = get_vectype_for_scalar_type (scalar_type);
9209
9210 if (!vectype)
9211 return NULL;
9212
9213 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9214 current_vector_size);
9215}
9216
b690cc0f
RG
9217/* Function get_same_sized_vectype
9218
9219 Returns a vector type corresponding to SCALAR_TYPE of size
9220 VECTOR_TYPE if supported by the target. */
9221
9222tree
bb67d9c7 9223get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9224{
2568d8a1 9225 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9226 return build_same_sized_truth_vector_type (vector_type);
9227
bb67d9c7
RG
9228 return get_vectype_for_scalar_type_and_size
9229 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9230}
9231
ebfd146a
IR
9232/* Function vect_is_simple_use.
9233
9234 Input:
81c40241
RB
9235 VINFO - the vect info of the loop or basic block that is being vectorized.
9236 OPERAND - operand in the loop or bb.
9237 Output:
9238 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9239 DT - the type of definition
ebfd146a
IR
9240
9241 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9242 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9243 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9244 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9245 is the case in reduction/induction computations).
9246 For basic blocks, supportable operands are constants and bb invariants.
9247 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9248
9249bool
81c40241
RB
9250vect_is_simple_use (tree operand, vec_info *vinfo,
9251 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 9252{
ebfd146a 9253 *def_stmt = NULL;
3fc356dc 9254 *dt = vect_unknown_def_type;
b8698a0f 9255
73fbfcad 9256 if (dump_enabled_p ())
ebfd146a 9257 {
78c60e3d
SS
9258 dump_printf_loc (MSG_NOTE, vect_location,
9259 "vect_is_simple_use: operand ");
9260 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 9261 dump_printf (MSG_NOTE, "\n");
ebfd146a 9262 }
b8698a0f 9263
b758f602 9264 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
9265 {
9266 *dt = vect_constant_def;
9267 return true;
9268 }
b8698a0f 9269
ebfd146a
IR
9270 if (is_gimple_min_invariant (operand))
9271 {
8644a673 9272 *dt = vect_external_def;
ebfd146a
IR
9273 return true;
9274 }
9275
ebfd146a
IR
9276 if (TREE_CODE (operand) != SSA_NAME)
9277 {
73fbfcad 9278 if (dump_enabled_p ())
af29617a
AH
9279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9280 "not ssa-name.\n");
ebfd146a
IR
9281 return false;
9282 }
b8698a0f 9283
3fc356dc 9284 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 9285 {
3fc356dc
RB
9286 *dt = vect_external_def;
9287 return true;
ebfd146a
IR
9288 }
9289
3fc356dc 9290 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 9291 if (dump_enabled_p ())
ebfd146a 9292 {
78c60e3d
SS
9293 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9294 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
9295 }
9296
61d371eb 9297 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 9298 *dt = vect_external_def;
ebfd146a
IR
9299 else
9300 {
3fc356dc 9301 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 9302 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
9303 }
9304
2e8ab70c
RB
9305 if (dump_enabled_p ())
9306 {
9307 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9308 switch (*dt)
9309 {
9310 case vect_uninitialized_def:
9311 dump_printf (MSG_NOTE, "uninitialized\n");
9312 break;
9313 case vect_constant_def:
9314 dump_printf (MSG_NOTE, "constant\n");
9315 break;
9316 case vect_external_def:
9317 dump_printf (MSG_NOTE, "external\n");
9318 break;
9319 case vect_internal_def:
9320 dump_printf (MSG_NOTE, "internal\n");
9321 break;
9322 case vect_induction_def:
9323 dump_printf (MSG_NOTE, "induction\n");
9324 break;
9325 case vect_reduction_def:
9326 dump_printf (MSG_NOTE, "reduction\n");
9327 break;
9328 case vect_double_reduction_def:
9329 dump_printf (MSG_NOTE, "double reduction\n");
9330 break;
9331 case vect_nested_cycle:
9332 dump_printf (MSG_NOTE, "nested cycle\n");
9333 break;
9334 case vect_unknown_def_type:
9335 dump_printf (MSG_NOTE, "unknown\n");
9336 break;
9337 }
9338 }
9339
81c40241 9340 if (*dt == vect_unknown_def_type)
ebfd146a 9341 {
73fbfcad 9342 if (dump_enabled_p ())
78c60e3d 9343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9344 "Unsupported pattern.\n");
ebfd146a
IR
9345 return false;
9346 }
9347
ebfd146a
IR
9348 switch (gimple_code (*def_stmt))
9349 {
9350 case GIMPLE_PHI:
ebfd146a 9351 case GIMPLE_ASSIGN:
ebfd146a 9352 case GIMPLE_CALL:
81c40241 9353 break;
ebfd146a 9354 default:
73fbfcad 9355 if (dump_enabled_p ())
78c60e3d 9356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9357 "unsupported defining stmt:\n");
ebfd146a
IR
9358 return false;
9359 }
9360
9361 return true;
9362}
9363
81c40241 9364/* Function vect_is_simple_use.
b690cc0f 9365
81c40241 9366 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
9367 type of OPERAND and stores it to *VECTYPE. If the definition of
9368 OPERAND is vect_uninitialized_def, vect_constant_def or
9369 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9370 is responsible to compute the best suited vector type for the
9371 scalar operand. */
9372
9373bool
81c40241
RB
9374vect_is_simple_use (tree operand, vec_info *vinfo,
9375 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 9376{
81c40241 9377 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
9378 return false;
9379
9380 /* Now get a vector type if the def is internal, otherwise supply
9381 NULL_TREE and leave it up to the caller to figure out a proper
9382 type for the use stmt. */
9383 if (*dt == vect_internal_def
9384 || *dt == vect_induction_def
9385 || *dt == vect_reduction_def
9386 || *dt == vect_double_reduction_def
9387 || *dt == vect_nested_cycle)
9388 {
9389 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
9390
9391 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9392 && !STMT_VINFO_RELEVANT (stmt_info)
9393 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 9394 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 9395
b690cc0f
RG
9396 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9397 gcc_assert (*vectype != NULL_TREE);
9398 }
9399 else if (*dt == vect_uninitialized_def
9400 || *dt == vect_constant_def
9401 || *dt == vect_external_def)
9402 *vectype = NULL_TREE;
9403 else
9404 gcc_unreachable ();
9405
9406 return true;
9407}
9408
ebfd146a
IR
9409
9410/* Function supportable_widening_operation
9411
b8698a0f
L
9412 Check whether an operation represented by the code CODE is a
9413 widening operation that is supported by the target platform in
b690cc0f
RG
9414 vector form (i.e., when operating on arguments of type VECTYPE_IN
9415 producing a result of type VECTYPE_OUT).
b8698a0f 9416
ebfd146a
IR
9417 Widening operations we currently support are NOP (CONVERT), FLOAT
9418 and WIDEN_MULT. This function checks if these operations are supported
9419 by the target platform either directly (via vector tree-codes), or via
9420 target builtins.
9421
9422 Output:
b8698a0f
L
9423 - CODE1 and CODE2 are codes of vector operations to be used when
9424 vectorizing the operation, if available.
ebfd146a
IR
9425 - MULTI_STEP_CVT determines the number of required intermediate steps in
9426 case of multi-step conversion (like char->short->int - in that case
9427 MULTI_STEP_CVT will be 1).
b8698a0f
L
9428 - INTERM_TYPES contains the intermediate type required to perform the
9429 widening operation (short in the above example). */
ebfd146a
IR
9430
9431bool
355fe088 9432supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 9433 tree vectype_out, tree vectype_in,
ebfd146a
IR
9434 enum tree_code *code1, enum tree_code *code2,
9435 int *multi_step_cvt,
9771b263 9436 vec<tree> *interm_types)
ebfd146a
IR
9437{
9438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9439 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 9440 struct loop *vect_loop = NULL;
ef4bddc2 9441 machine_mode vec_mode;
81f40b79 9442 enum insn_code icode1, icode2;
ebfd146a 9443 optab optab1, optab2;
b690cc0f
RG
9444 tree vectype = vectype_in;
9445 tree wide_vectype = vectype_out;
ebfd146a 9446 enum tree_code c1, c2;
4a00c761
JJ
9447 int i;
9448 tree prev_type, intermediate_type;
ef4bddc2 9449 machine_mode intermediate_mode, prev_mode;
4a00c761 9450 optab optab3, optab4;
ebfd146a 9451
4a00c761 9452 *multi_step_cvt = 0;
4ef69dfc
IR
9453 if (loop_info)
9454 vect_loop = LOOP_VINFO_LOOP (loop_info);
9455
ebfd146a
IR
9456 switch (code)
9457 {
9458 case WIDEN_MULT_EXPR:
6ae6116f
RH
9459 /* The result of a vectorized widening operation usually requires
9460 two vectors (because the widened results do not fit into one vector).
9461 The generated vector results would normally be expected to be
9462 generated in the same order as in the original scalar computation,
9463 i.e. if 8 results are generated in each vector iteration, they are
9464 to be organized as follows:
9465 vect1: [res1,res2,res3,res4],
9466 vect2: [res5,res6,res7,res8].
9467
9468 However, in the special case that the result of the widening
9469 operation is used in a reduction computation only, the order doesn't
9470 matter (because when vectorizing a reduction we change the order of
9471 the computation). Some targets can take advantage of this and
9472 generate more efficient code. For example, targets like Altivec,
9473 that support widen_mult using a sequence of {mult_even,mult_odd}
9474 generate the following vectors:
9475 vect1: [res1,res3,res5,res7],
9476 vect2: [res2,res4,res6,res8].
9477
9478 When vectorizing outer-loops, we execute the inner-loop sequentially
9479 (each vectorized inner-loop iteration contributes to VF outer-loop
9480 iterations in parallel). We therefore don't allow to change the
9481 order of the computation in the inner-loop during outer-loop
9482 vectorization. */
9483 /* TODO: Another case in which order doesn't *really* matter is when we
9484 widen and then contract again, e.g. (short)((int)x * y >> 8).
9485 Normally, pack_trunc performs an even/odd permute, whereas the
9486 repack from an even/odd expansion would be an interleave, which
9487 would be significantly simpler for e.g. AVX2. */
9488 /* In any case, in order to avoid duplicating the code below, recurse
9489 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9490 are properly set up for the caller. If we fail, we'll continue with
9491 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9492 if (vect_loop
9493 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9494 && !nested_in_vect_loop_p (vect_loop, stmt)
9495 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9496 stmt, vectype_out, vectype_in,
a86ec597
RH
9497 code1, code2, multi_step_cvt,
9498 interm_types))
ebc047a2
CH
9499 {
9500 /* Elements in a vector with vect_used_by_reduction property cannot
9501 be reordered if the use chain with this property does not have the
9502 same operation. One such an example is s += a * b, where elements
9503 in a and b cannot be reordered. Here we check if the vector defined
9504 by STMT is only directly used in the reduction statement. */
9505 tree lhs = gimple_assign_lhs (stmt);
9506 use_operand_p dummy;
355fe088 9507 gimple *use_stmt;
ebc047a2
CH
9508 stmt_vec_info use_stmt_info = NULL;
9509 if (single_imm_use (lhs, &dummy, &use_stmt)
9510 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9511 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9512 return true;
9513 }
4a00c761
JJ
9514 c1 = VEC_WIDEN_MULT_LO_EXPR;
9515 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
9516 break;
9517
81c40241
RB
9518 case DOT_PROD_EXPR:
9519 c1 = DOT_PROD_EXPR;
9520 c2 = DOT_PROD_EXPR;
9521 break;
9522
9523 case SAD_EXPR:
9524 c1 = SAD_EXPR;
9525 c2 = SAD_EXPR;
9526 break;
9527
6ae6116f
RH
9528 case VEC_WIDEN_MULT_EVEN_EXPR:
9529 /* Support the recursion induced just above. */
9530 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9531 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9532 break;
9533
36ba4aae 9534 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
9535 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9536 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
9537 break;
9538
ebfd146a 9539 CASE_CONVERT:
4a00c761
JJ
9540 c1 = VEC_UNPACK_LO_EXPR;
9541 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
9542 break;
9543
9544 case FLOAT_EXPR:
4a00c761
JJ
9545 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9546 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
9547 break;
9548
9549 case FIX_TRUNC_EXPR:
9550 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9551 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9552 computing the operation. */
9553 return false;
9554
9555 default:
9556 gcc_unreachable ();
9557 }
9558
6ae6116f 9559 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 9560 std::swap (c1, c2);
4a00c761 9561
ebfd146a
IR
9562 if (code == FIX_TRUNC_EXPR)
9563 {
9564 /* The signedness is determined from output operand. */
b690cc0f
RG
9565 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9566 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
9567 }
9568 else
9569 {
9570 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9571 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9572 }
9573
9574 if (!optab1 || !optab2)
9575 return false;
9576
9577 vec_mode = TYPE_MODE (vectype);
947131ba
RS
9578 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9579 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9580 return false;
9581
4a00c761
JJ
9582 *code1 = c1;
9583 *code2 = c2;
9584
9585 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9586 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9587 /* For scalar masks we may have different boolean
9588 vector types having the same QImode. Thus we
9589 add additional check for elements number. */
9590 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9591 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
9592 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 9593
b8698a0f 9594 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 9595 types. */
ebfd146a 9596
4a00c761
JJ
9597 prev_type = vectype;
9598 prev_mode = vec_mode;
b8698a0f 9599
4a00c761
JJ
9600 if (!CONVERT_EXPR_CODE_P (code))
9601 return false;
b8698a0f 9602
4a00c761
JJ
9603 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9604 intermediate steps in promotion sequence. We try
9605 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9606 not. */
9771b263 9607 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9608 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9609 {
9610 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9611 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9612 {
928686b1
RS
9613 poly_uint64 intermediate_nelts
9614 = exact_div (TYPE_VECTOR_SUBPARTS (prev_type), 2);
3ae0661a 9615 intermediate_type
928686b1 9616 = build_truth_vector_type (intermediate_nelts,
3ae0661a
IE
9617 current_vector_size);
9618 if (intermediate_mode != TYPE_MODE (intermediate_type))
9619 return false;
9620 }
9621 else
9622 intermediate_type
9623 = lang_hooks.types.type_for_mode (intermediate_mode,
9624 TYPE_UNSIGNED (prev_type));
9625
4a00c761
JJ
9626 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9627 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9628
9629 if (!optab3 || !optab4
9630 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9631 || insn_data[icode1].operand[0].mode != intermediate_mode
9632 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9633 || insn_data[icode2].operand[0].mode != intermediate_mode
9634 || ((icode1 = optab_handler (optab3, intermediate_mode))
9635 == CODE_FOR_nothing)
9636 || ((icode2 = optab_handler (optab4, intermediate_mode))
9637 == CODE_FOR_nothing))
9638 break;
ebfd146a 9639
9771b263 9640 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9641 (*multi_step_cvt)++;
9642
9643 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9644 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 9645 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9646 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
9647 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
9648
9649 prev_type = intermediate_type;
9650 prev_mode = intermediate_mode;
ebfd146a
IR
9651 }
9652
9771b263 9653 interm_types->release ();
4a00c761 9654 return false;
ebfd146a
IR
9655}
9656
9657
9658/* Function supportable_narrowing_operation
9659
b8698a0f
L
9660 Check whether an operation represented by the code CODE is a
9661 narrowing operation that is supported by the target platform in
b690cc0f
RG
9662 vector form (i.e., when operating on arguments of type VECTYPE_IN
9663 and producing a result of type VECTYPE_OUT).
b8698a0f 9664
ebfd146a 9665 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9666 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9667 the target platform directly via vector tree-codes.
9668
9669 Output:
b8698a0f
L
9670 - CODE1 is the code of a vector operation to be used when
9671 vectorizing the operation, if available.
ebfd146a
IR
9672 - MULTI_STEP_CVT determines the number of required intermediate steps in
9673 case of multi-step conversion (like int->short->char - in that case
9674 MULTI_STEP_CVT will be 1).
9675 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9676 narrowing operation (short in the above example). */
ebfd146a
IR
9677
9678bool
9679supportable_narrowing_operation (enum tree_code code,
b690cc0f 9680 tree vectype_out, tree vectype_in,
ebfd146a 9681 enum tree_code *code1, int *multi_step_cvt,
9771b263 9682 vec<tree> *interm_types)
ebfd146a 9683{
ef4bddc2 9684 machine_mode vec_mode;
ebfd146a
IR
9685 enum insn_code icode1;
9686 optab optab1, interm_optab;
b690cc0f
RG
9687 tree vectype = vectype_in;
9688 tree narrow_vectype = vectype_out;
ebfd146a 9689 enum tree_code c1;
3ae0661a 9690 tree intermediate_type, prev_type;
ef4bddc2 9691 machine_mode intermediate_mode, prev_mode;
ebfd146a 9692 int i;
4a00c761 9693 bool uns;
ebfd146a 9694
4a00c761 9695 *multi_step_cvt = 0;
ebfd146a
IR
9696 switch (code)
9697 {
9698 CASE_CONVERT:
9699 c1 = VEC_PACK_TRUNC_EXPR;
9700 break;
9701
9702 case FIX_TRUNC_EXPR:
9703 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9704 break;
9705
9706 case FLOAT_EXPR:
9707 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9708 tree code and optabs used for computing the operation. */
9709 return false;
9710
9711 default:
9712 gcc_unreachable ();
9713 }
9714
9715 if (code == FIX_TRUNC_EXPR)
9716 /* The signedness is determined from output operand. */
b690cc0f 9717 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9718 else
9719 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9720
9721 if (!optab1)
9722 return false;
9723
9724 vec_mode = TYPE_MODE (vectype);
947131ba 9725 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9726 return false;
9727
4a00c761
JJ
9728 *code1 = c1;
9729
9730 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9731 /* For scalar masks we may have different boolean
9732 vector types having the same QImode. Thus we
9733 add additional check for elements number. */
9734 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9735 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
9736 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 9737
ebfd146a
IR
9738 /* Check if it's a multi-step conversion that can be done using intermediate
9739 types. */
4a00c761 9740 prev_mode = vec_mode;
3ae0661a 9741 prev_type = vectype;
4a00c761
JJ
9742 if (code == FIX_TRUNC_EXPR)
9743 uns = TYPE_UNSIGNED (vectype_out);
9744 else
9745 uns = TYPE_UNSIGNED (vectype);
9746
9747 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9748 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9749 costly than signed. */
9750 if (code == FIX_TRUNC_EXPR && uns)
9751 {
9752 enum insn_code icode2;
9753
9754 intermediate_type
9755 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9756 interm_optab
9757 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9758 if (interm_optab != unknown_optab
4a00c761
JJ
9759 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9760 && insn_data[icode1].operand[0].mode
9761 == insn_data[icode2].operand[0].mode)
9762 {
9763 uns = false;
9764 optab1 = interm_optab;
9765 icode1 = icode2;
9766 }
9767 }
ebfd146a 9768
4a00c761
JJ
9769 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9770 intermediate steps in promotion sequence. We try
9771 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9772 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9773 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9774 {
9775 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9776 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9777 {
9778 intermediate_type
9779 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9780 current_vector_size);
9781 if (intermediate_mode != TYPE_MODE (intermediate_type))
9782 return false;
9783 }
9784 else
9785 intermediate_type
9786 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
9787 interm_optab
9788 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9789 optab_default);
9790 if (!interm_optab
9791 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9792 || insn_data[icode1].operand[0].mode != intermediate_mode
9793 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9794 == CODE_FOR_nothing))
9795 break;
9796
9771b263 9797 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9798 (*multi_step_cvt)++;
9799
9800 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 9801 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9802 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
9803 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
9804
9805 prev_mode = intermediate_mode;
3ae0661a 9806 prev_type = intermediate_type;
4a00c761 9807 optab1 = interm_optab;
ebfd146a
IR
9808 }
9809
9771b263 9810 interm_types->release ();
4a00c761 9811 return false;
ebfd146a 9812}