]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
Add support for vectorising live-out values using SVE LASTB
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
92345349 100 if (body_cost_vec)
c3e7ee41 101 {
92345349 102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
103 stmt_info_for_cost si = { count, kind,
104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 misalign };
106 body_cost_vec->safe_push (si);
c3e7ee41 107 return (unsigned)
92345349 108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109 }
110 else
310213d4
RB
111 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
113}
114
272c6793
RS
115/* Return a variable of type ELEM_TYPE[NELEMS]. */
116
117static tree
118create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119{
120 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 "vect_array");
122}
123
124/* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
128
129static tree
355fe088 130read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
131 tree array, unsigned HOST_WIDE_INT n)
132{
133 tree vect_type, vect, vect_name, array_ref;
355fe088 134 gimple *new_stmt;
272c6793
RS
135
136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137 vect_type = TREE_TYPE (TREE_TYPE (array));
138 vect = vect_create_destination_var (scalar_dest, vect_type);
139 array_ref = build4 (ARRAY_REF, vect_type, array,
140 build_int_cst (size_type_node, n),
141 NULL_TREE, NULL_TREE);
142
143 new_stmt = gimple_build_assign (vect, array_ref);
144 vect_name = make_ssa_name (vect, new_stmt);
145 gimple_assign_set_lhs (new_stmt, vect_name);
146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
147
148 return vect_name;
149}
150
151/* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
154
155static void
355fe088 156write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
157 tree array, unsigned HOST_WIDE_INT n)
158{
159 tree array_ref;
355fe088 160 gimple *new_stmt;
272c6793
RS
161
162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
165
166 new_stmt = gimple_build_assign (array_ref, vect);
167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
168}
169
170/* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
172 (and its group). */
173
174static tree
44fc7854 175create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 176{
44fc7854 177 tree mem_ref;
272c6793 178
272c6793
RS
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
644ffefd 181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
182 return mem_ref;
183}
184
ebfd146a
IR
185/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
186
187/* Function vect_mark_relevant.
188
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190
191static void
355fe088 192vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 193 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
194{
195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 198 gimple *pattern_stmt;
ebfd146a 199
73fbfcad 200 if (dump_enabled_p ())
66c16fd9
RB
201 {
202 dump_printf_loc (MSG_NOTE, vect_location,
203 "mark relevant %d, live %d: ", relevant, live_p);
204 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 }
ebfd146a 206
83197f37
IR
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
ebfd146a
IR
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212 {
97ecdb46
JJ
213 /* This is the last stmt in a sequence that was detected as a
214 pattern that can potentially be vectorized. Don't mark the stmt
215 as relevant/live because it's not going to be vectorized.
216 Instead mark the pattern-stmt that replaces it. */
83197f37 217
97ecdb46
JJ
218 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
219
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_info = vinfo_for_stmt (pattern_stmt);
225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228 stmt = pattern_stmt;
ebfd146a
IR
229 }
230
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
234
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237 {
73fbfcad 238 if (dump_enabled_p ())
78c60e3d 239 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 240 "already marked relevant/live.\n");
ebfd146a
IR
241 return;
242 }
243
9771b263 244 worklist->safe_push (stmt);
ebfd146a
IR
245}
246
247
b28ead45
AH
248/* Function is_simple_and_all_uses_invariant
249
250 Return true if STMT is simple and all uses of it are invariant. */
251
252bool
253is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
254{
255 tree op;
256 gimple *def_stmt;
257 ssa_op_iter iter;
258
259 if (!is_gimple_assign (stmt))
260 return false;
261
262 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
263 {
264 enum vect_def_type dt = vect_uninitialized_def;
265
266 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
267 {
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 "use not simple.\n");
271 return false;
272 }
273
274 if (dt != vect_external_def && dt != vect_constant_def)
275 return false;
276 }
277 return true;
278}
279
ebfd146a
IR
280/* Function vect_stmt_relevant_p.
281
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
284
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
289
290 CHECKME: what other side effects would the vectorizer allow? */
291
292static bool
355fe088 293vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
294 enum vect_relevant *relevant, bool *live_p)
295{
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
301
8644a673 302 *relevant = vect_unused_in_scope;
ebfd146a
IR
303 *live_p = false;
304
305 /* cond stmt other than loop exit cond. */
b8698a0f
L
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
8644a673 309 *relevant = vect_used_in_scope;
ebfd146a
IR
310
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
ebfd146a 315 {
73fbfcad 316 if (dump_enabled_p ())
78c60e3d 317 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 318 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 319 *relevant = vect_used_in_scope;
ebfd146a
IR
320 }
321
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324 {
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 {
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
329 {
73fbfcad 330 if (dump_enabled_p ())
78c60e3d 331 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 332 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 333
3157b0c2
AO
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
336
ebfd146a
IR
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
341
342 *live_p = true;
343 }
344 }
345 }
346
3a2edf4c
AH
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
349 {
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
354 }
355
ebfd146a
IR
356 return (*live_p || *relevant);
357}
358
359
b8698a0f 360/* Function exist_non_indexing_operands_for_use_p
ebfd146a 361
ff802fa1 362 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
363 used in STMT for anything other than indexing an array. */
364
365static bool
355fe088 366exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
367{
368 tree operand;
369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 370
ff802fa1 371 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
372 reference in STMT, then any operand that corresponds to USE
373 is not indexing an array. */
374 if (!STMT_VINFO_DATA_REF (stmt_info))
375 return true;
59a05b0c 376
ebfd146a
IR
377 /* STMT has a data_ref. FORNOW this means that its of one of
378 the following forms:
379 -1- ARRAY_REF = var
380 -2- var = ARRAY_REF
381 (This should have been verified in analyze_data_refs).
382
383 'var' in the second case corresponds to a def, not a use,
b8698a0f 384 so USE cannot correspond to any operands that are not used
ebfd146a
IR
385 for array indexing.
386
387 Therefore, all we need to check is if STMT falls into the
388 first case, and whether var corresponds to USE. */
ebfd146a
IR
389
390 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
391 {
392 if (is_gimple_call (stmt)
393 && gimple_call_internal_p (stmt))
394 switch (gimple_call_internal_fn (stmt))
395 {
396 case IFN_MASK_STORE:
397 operand = gimple_call_arg (stmt, 3);
398 if (operand == use)
399 return true;
400 /* FALLTHRU */
401 case IFN_MASK_LOAD:
402 operand = gimple_call_arg (stmt, 2);
403 if (operand == use)
404 return true;
405 break;
406 default:
407 break;
408 }
409 return false;
410 }
411
59a05b0c
EB
412 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
413 return false;
ebfd146a 414 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
415 if (TREE_CODE (operand) != SSA_NAME)
416 return false;
417
418 if (operand == use)
419 return true;
420
421 return false;
422}
423
424
b8698a0f 425/*
ebfd146a
IR
426 Function process_use.
427
428 Inputs:
429 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 430 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 431 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 432 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
433 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
434 be performed.
ebfd146a
IR
435
436 Outputs:
437 Generally, LIVE_P and RELEVANT are used to define the liveness and
438 relevance info of the DEF_STMT of this USE:
439 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
440 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
441 Exceptions:
442 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 443 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 444 of the respective DEF_STMT is left unchanged.
b8698a0f
L
445 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
446 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
447 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
448 be modified accordingly.
449
450 Return true if everything is as expected. Return false otherwise. */
451
452static bool
b28ead45 453process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 454 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 455 bool force)
ebfd146a
IR
456{
457 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
459 stmt_vec_info dstmt_vinfo;
460 basic_block bb, def_bb;
355fe088 461 gimple *def_stmt;
ebfd146a
IR
462 enum vect_def_type dt;
463
b8698a0f 464 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 465 that are used for address computation are not considered relevant. */
aec7ae7d 466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
467 return true;
468
81c40241 469 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 470 {
73fbfcad 471 if (dump_enabled_p ())
78c60e3d 472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 473 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
474 return false;
475 }
476
477 if (!def_stmt || gimple_nop_p (def_stmt))
478 return true;
479
480 def_bb = gimple_bb (def_stmt);
481 if (!flow_bb_inside_loop_p (loop, def_bb))
482 {
73fbfcad 483 if (dump_enabled_p ())
e645e942 484 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
485 return true;
486 }
487
b8698a0f
L
488 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
489 DEF_STMT must have already been processed, because this should be the
490 only way that STMT, which is a reduction-phi, was put in the worklist,
491 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
492 check that everything is as expected, and we are done. */
493 dstmt_vinfo = vinfo_for_stmt (def_stmt);
494 bb = gimple_bb (stmt);
495 if (gimple_code (stmt) == GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
497 && gimple_code (def_stmt) != GIMPLE_PHI
498 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
499 && bb->loop_father == def_bb->loop_father)
500 {
73fbfcad 501 if (dump_enabled_p ())
78c60e3d 502 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 503 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
504 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
505 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
506 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 507 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 508 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
509 return true;
510 }
511
512 /* case 3a: outer-loop stmt defining an inner-loop stmt:
513 outer-loop-header-bb:
514 d = def_stmt
515 inner-loop:
516 stmt # use (d)
517 outer-loop-tail-bb:
518 ... */
519 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
520 {
73fbfcad 521 if (dump_enabled_p ())
78c60e3d 522 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 523 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 524
ebfd146a
IR
525 switch (relevant)
526 {
8644a673 527 case vect_unused_in_scope:
7c5222ff
IR
528 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
529 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 530 break;
7c5222ff 531
ebfd146a 532 case vect_used_in_outer_by_reduction:
7c5222ff 533 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
534 relevant = vect_used_by_reduction;
535 break;
7c5222ff 536
ebfd146a 537 case vect_used_in_outer:
7c5222ff 538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 539 relevant = vect_used_in_scope;
ebfd146a 540 break;
7c5222ff 541
8644a673 542 case vect_used_in_scope:
ebfd146a
IR
543 break;
544
545 default:
546 gcc_unreachable ();
b8698a0f 547 }
ebfd146a
IR
548 }
549
550 /* case 3b: inner-loop stmt defining an outer-loop stmt:
551 outer-loop-header-bb:
552 ...
553 inner-loop:
554 d = def_stmt
06066f92 555 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
556 stmt # use (d) */
557 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
558 {
73fbfcad 559 if (dump_enabled_p ())
78c60e3d 560 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 561 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 562
ebfd146a
IR
563 switch (relevant)
564 {
8644a673 565 case vect_unused_in_scope:
b8698a0f 566 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 567 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 568 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
569 break;
570
ebfd146a 571 case vect_used_by_reduction:
b28ead45 572 case vect_used_only_live:
ebfd146a
IR
573 relevant = vect_used_in_outer_by_reduction;
574 break;
575
8644a673 576 case vect_used_in_scope:
ebfd146a
IR
577 relevant = vect_used_in_outer;
578 break;
579
580 default:
581 gcc_unreachable ();
582 }
583 }
643a9684
RB
584 /* We are also not interested in uses on loop PHI backedges that are
585 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
586 and cause hybrid SLP for SLP inductions. Unless the PHI is live
587 of course. */
643a9684
RB
588 else if (gimple_code (stmt) == GIMPLE_PHI
589 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 590 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
591 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
592 == use))
593 {
594 if (dump_enabled_p ())
595 dump_printf_loc (MSG_NOTE, vect_location,
596 "induction value on backedge.\n");
597 return true;
598 }
599
ebfd146a 600
b28ead45 601 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
602 return true;
603}
604
605
606/* Function vect_mark_stmts_to_be_vectorized.
607
608 Not all stmts in the loop need to be vectorized. For example:
609
610 for i...
611 for j...
612 1. T0 = i + j
613 2. T1 = a[T0]
614
615 3. j = j + 1
616
617 Stmt 1 and 3 do not need to be vectorized, because loop control and
618 addressing of vectorized data-refs are handled differently.
619
620 This pass detects such stmts. */
621
622bool
623vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
624{
ebfd146a
IR
625 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
626 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
627 unsigned int nbbs = loop->num_nodes;
628 gimple_stmt_iterator si;
355fe088 629 gimple *stmt;
ebfd146a
IR
630 unsigned int i;
631 stmt_vec_info stmt_vinfo;
632 basic_block bb;
355fe088 633 gimple *phi;
ebfd146a 634 bool live_p;
b28ead45 635 enum vect_relevant relevant;
ebfd146a 636
73fbfcad 637 if (dump_enabled_p ())
78c60e3d 638 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 639 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 640
355fe088 641 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
642
643 /* 1. Init worklist. */
644 for (i = 0; i < nbbs; i++)
645 {
646 bb = bbs[i];
647 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 648 {
ebfd146a 649 phi = gsi_stmt (si);
73fbfcad 650 if (dump_enabled_p ())
ebfd146a 651 {
78c60e3d
SS
652 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
654 }
655
656 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 657 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
658 }
659 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
660 {
661 stmt = gsi_stmt (si);
73fbfcad 662 if (dump_enabled_p ())
ebfd146a 663 {
78c60e3d
SS
664 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 666 }
ebfd146a
IR
667
668 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 669 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
670 }
671 }
672
673 /* 2. Process_worklist */
9771b263 674 while (worklist.length () > 0)
ebfd146a
IR
675 {
676 use_operand_p use_p;
677 ssa_op_iter iter;
678
9771b263 679 stmt = worklist.pop ();
73fbfcad 680 if (dump_enabled_p ())
ebfd146a 681 {
78c60e3d
SS
682 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
684 }
685
b8698a0f 686 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
687 (DEF_STMT) as relevant/irrelevant according to the relevance property
688 of STMT. */
ebfd146a
IR
689 stmt_vinfo = vinfo_for_stmt (stmt);
690 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 691
b28ead45
AH
692 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
693 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
694
695 One exception is when STMT has been identified as defining a reduction
b28ead45 696 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 697 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 698 those that are used by a reduction computation, and those that are
ff802fa1 699 (also) used by a regular computation. This allows us later on to
b8698a0f 700 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 701 order of the results that they produce does not have to be kept. */
ebfd146a 702
b28ead45 703 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 704 {
06066f92 705 case vect_reduction_def:
b28ead45
AH
706 gcc_assert (relevant != vect_unused_in_scope);
707 if (relevant != vect_unused_in_scope
708 && relevant != vect_used_in_scope
709 && relevant != vect_used_by_reduction
710 && relevant != vect_used_only_live)
06066f92 711 {
b28ead45
AH
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
714 "unsupported use of reduction.\n");
715 return false;
06066f92 716 }
06066f92 717 break;
b8698a0f 718
06066f92 719 case vect_nested_cycle:
b28ead45
AH
720 if (relevant != vect_unused_in_scope
721 && relevant != vect_used_in_outer_by_reduction
722 && relevant != vect_used_in_outer)
06066f92 723 {
73fbfcad 724 if (dump_enabled_p ())
78c60e3d 725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 726 "unsupported use of nested cycle.\n");
7c5222ff 727
06066f92
IR
728 return false;
729 }
b8698a0f
L
730 break;
731
06066f92 732 case vect_double_reduction_def:
b28ead45
AH
733 if (relevant != vect_unused_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
06066f92 736 {
73fbfcad 737 if (dump_enabled_p ())
78c60e3d 738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 739 "unsupported use of double reduction.\n");
7c5222ff 740
7c5222ff 741 return false;
06066f92 742 }
b8698a0f 743 break;
7c5222ff 744
06066f92
IR
745 default:
746 break;
7c5222ff 747 }
b8698a0f 748
aec7ae7d 749 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
750 {
751 /* Pattern statements are not inserted into the code, so
752 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
753 have to scan the RHS or function arguments instead. */
754 if (is_gimple_assign (stmt))
755 {
69d2aade
JJ
756 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
757 tree op = gimple_assign_rhs1 (stmt);
758
759 i = 1;
760 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
761 {
762 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 763 relevant, &worklist, false)
69d2aade 764 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 765 relevant, &worklist, false))
566d377a 766 return false;
69d2aade
JJ
767 i = 2;
768 }
769 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 770 {
69d2aade 771 op = gimple_op (stmt, i);
afbe6325 772 if (TREE_CODE (op) == SSA_NAME
b28ead45 773 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 774 &worklist, false))
07687835 775 return false;
9d5e7640
IR
776 }
777 }
778 else if (is_gimple_call (stmt))
779 {
780 for (i = 0; i < gimple_call_num_args (stmt); i++)
781 {
782 tree arg = gimple_call_arg (stmt, i);
b28ead45 783 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 784 &worklist, false))
07687835 785 return false;
9d5e7640
IR
786 }
787 }
788 }
789 else
790 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
791 {
792 tree op = USE_FROM_PTR (use_p);
b28ead45 793 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 794 &worklist, false))
07687835 795 return false;
9d5e7640 796 }
aec7ae7d 797
3bab6342 798 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 799 {
134c85ca
RS
800 gather_scatter_info gs_info;
801 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
802 gcc_unreachable ();
803 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
804 &worklist, true))
566d377a 805 return false;
aec7ae7d 806 }
ebfd146a
IR
807 } /* while worklist */
808
ebfd146a
IR
809 return true;
810}
811
812
b8698a0f 813/* Function vect_model_simple_cost.
ebfd146a 814
b8698a0f 815 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
816 single op. Right now, this does not account for multiple insns that could
817 be generated for the single vector op. We will handle that shortly. */
818
819void
b8698a0f 820vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 821 enum vect_def_type *dt,
4fc5ebf1 822 int ndts,
92345349
BS
823 stmt_vector_for_cost *prologue_cost_vec,
824 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
825{
826 int i;
92345349 827 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
828
829 /* The SLP costs were already calculated during SLP tree build. */
830 if (PURE_SLP_STMT (stmt_info))
831 return;
832
4fc5ebf1
JG
833 /* Cost the "broadcast" of a scalar operand in to a vector operand.
834 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
835 cost model. */
836 for (i = 0; i < ndts; i++)
92345349 837 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
4fc5ebf1 838 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
92345349 839 stmt_info, 0, vect_prologue);
c3e7ee41
BS
840
841 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
842 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
843 stmt_info, 0, vect_body);
c3e7ee41 844
73fbfcad 845 if (dump_enabled_p ())
78c60e3d
SS
846 dump_printf_loc (MSG_NOTE, vect_location,
847 "vect_model_simple_cost: inside_cost = %d, "
e645e942 848 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
849}
850
851
8bd37302
BS
852/* Model cost for type demotion and promotion operations. PWR is normally
853 zero for single-step promotions and demotions. It will be one if
854 two-step promotion/demotion is required, and so on. Each additional
855 step doubles the number of instructions required. */
856
857static void
858vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
859 enum vect_def_type *dt, int pwr)
860{
861 int i, tmp;
92345349 862 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
863 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
864 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
865 void *target_cost_data;
8bd37302
BS
866
867 /* The SLP costs were already calculated during SLP tree build. */
868 if (PURE_SLP_STMT (stmt_info))
869 return;
870
c3e7ee41
BS
871 if (loop_vinfo)
872 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
873 else
874 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
875
8bd37302
BS
876 for (i = 0; i < pwr + 1; i++)
877 {
878 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
879 (i + 1) : i;
c3e7ee41 880 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
881 vec_promote_demote, stmt_info, 0,
882 vect_body);
8bd37302
BS
883 }
884
885 /* FORNOW: Assuming maximum 2 args per stmts. */
886 for (i = 0; i < 2; i++)
92345349
BS
887 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
888 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
889 stmt_info, 0, vect_prologue);
8bd37302 890
73fbfcad 891 if (dump_enabled_p ())
78c60e3d
SS
892 dump_printf_loc (MSG_NOTE, vect_location,
893 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 894 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
895}
896
ebfd146a
IR
897/* Function vect_model_store_cost
898
0d0293ac
MM
899 Models cost for stores. In the case of grouped accesses, one access
900 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
901
902void
b8698a0f 903vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee 904 vect_memory_access_type memory_access_type,
9ce4345a 905 vec_load_store_type vls_type, slp_tree slp_node,
92345349
BS
906 stmt_vector_for_cost *prologue_cost_vec,
907 stmt_vector_for_cost *body_cost_vec)
ebfd146a 908{
92345349 909 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
910 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
911 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
912 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 913
9ce4345a 914 if (vls_type == VLS_STORE_INVARIANT)
92345349
BS
915 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
916 stmt_info, 0, vect_prologue);
ebfd146a 917
892a981f
RS
918 /* Grouped stores update all elements in the group at once,
919 so we want the DR for the first statement. */
920 if (!slp_node && grouped_access_p)
720f5239 921 {
892a981f
RS
922 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
923 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 924 }
ebfd146a 925
892a981f
RS
926 /* True if we should include any once-per-group costs as well as
927 the cost of the statement itself. For SLP we only get called
928 once per group anyhow. */
929 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
930
272c6793 931 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 932 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 933 access is instead being provided by a permute-and-store operation,
2de001ee
RS
934 include the cost of the permutes. */
935 if (first_stmt_p
936 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 937 {
e1377713
ES
938 /* Uses a high and low interleave or shuffle operations for each
939 needed permute. */
892a981f 940 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 941 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
942 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
943 stmt_info, 0, vect_body);
ebfd146a 944
73fbfcad 945 if (dump_enabled_p ())
78c60e3d 946 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 947 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 948 group_size);
ebfd146a
IR
949 }
950
cee62fee 951 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 952 /* Costs of the stores. */
067bc855
RB
953 if (memory_access_type == VMAT_ELEMENTWISE
954 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
955 {
956 /* N scalar stores plus extracting the elements. */
957 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
958 inside_cost += record_stmt_cost (body_cost_vec,
959 ncopies * assumed_nunits,
960 scalar_store, stmt_info, 0, vect_body);
961 }
f2e2a985 962 else
892a981f 963 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 964
2de001ee
RS
965 if (memory_access_type == VMAT_ELEMENTWISE
966 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
967 {
968 /* N scalar stores plus extracting the elements. */
969 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
970 inside_cost += record_stmt_cost (body_cost_vec,
971 ncopies * assumed_nunits,
972 vec_to_scalar, stmt_info, 0, vect_body);
973 }
cee62fee 974
73fbfcad 975 if (dump_enabled_p ())
78c60e3d
SS
976 dump_printf_loc (MSG_NOTE, vect_location,
977 "vect_model_store_cost: inside_cost = %d, "
e645e942 978 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
979}
980
981
720f5239
IR
982/* Calculate cost of DR's memory access. */
983void
984vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 985 unsigned int *inside_cost,
92345349 986 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
987{
988 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 989 gimple *stmt = DR_STMT (dr);
c3e7ee41 990 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
991
992 switch (alignment_support_scheme)
993 {
994 case dr_aligned:
995 {
92345349
BS
996 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
997 vector_store, stmt_info, 0,
998 vect_body);
720f5239 999
73fbfcad 1000 if (dump_enabled_p ())
78c60e3d 1001 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1002 "vect_model_store_cost: aligned.\n");
720f5239
IR
1003 break;
1004 }
1005
1006 case dr_unaligned_supported:
1007 {
720f5239 1008 /* Here, we assign an additional cost for the unaligned store. */
92345349 1009 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1010 unaligned_store, stmt_info,
92345349 1011 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1012 if (dump_enabled_p ())
78c60e3d
SS
1013 dump_printf_loc (MSG_NOTE, vect_location,
1014 "vect_model_store_cost: unaligned supported by "
e645e942 1015 "hardware.\n");
720f5239
IR
1016 break;
1017 }
1018
38eec4c6
UW
1019 case dr_unaligned_unsupported:
1020 {
1021 *inside_cost = VECT_MAX_COST;
1022
73fbfcad 1023 if (dump_enabled_p ())
78c60e3d 1024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1025 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1026 break;
1027 }
1028
720f5239
IR
1029 default:
1030 gcc_unreachable ();
1031 }
1032}
1033
1034
ebfd146a
IR
1035/* Function vect_model_load_cost
1036
892a981f
RS
1037 Models cost for loads. In the case of grouped accesses, one access has
1038 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1039 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1040 access scheme chosen. */
1041
1042void
92345349 1043vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
1044 vect_memory_access_type memory_access_type,
1045 slp_tree slp_node,
92345349
BS
1046 stmt_vector_for_cost *prologue_cost_vec,
1047 stmt_vector_for_cost *body_cost_vec)
ebfd146a 1048{
892a981f
RS
1049 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1050 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1051 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1052 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1053
892a981f
RS
1054 /* Grouped loads read all elements in the group at once,
1055 so we want the DR for the first statement. */
1056 if (!slp_node && grouped_access_p)
ebfd146a 1057 {
892a981f
RS
1058 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1059 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1060 }
1061
892a981f
RS
1062 /* True if we should include any once-per-group costs as well as
1063 the cost of the statement itself. For SLP we only get called
1064 once per group anyhow. */
1065 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1066
272c6793 1067 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1069 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1070 include the cost of the permutes. */
1071 if (first_stmt_p
1072 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1073 {
2c23db6d
ES
1074 /* Uses an even and odd extract operations or shuffle operations
1075 for each needed permute. */
892a981f 1076 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d
ES
1077 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1078 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1079 stmt_info, 0, vect_body);
ebfd146a 1080
73fbfcad 1081 if (dump_enabled_p ())
e645e942
TJ
1082 dump_printf_loc (MSG_NOTE, vect_location,
1083 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1084 group_size);
ebfd146a
IR
1085 }
1086
1087 /* The loads themselves. */
067bc855
RB
1088 if (memory_access_type == VMAT_ELEMENTWISE
1089 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1090 {
a21892ad
BS
1091 /* N scalar loads plus gathering them into a vector. */
1092 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1093 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
92345349 1094 inside_cost += record_stmt_cost (body_cost_vec,
c5126ce8 1095 ncopies * assumed_nunits,
92345349 1096 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1097 }
1098 else
892a981f 1099 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349
BS
1100 &inside_cost, &prologue_cost,
1101 prologue_cost_vec, body_cost_vec, true);
2de001ee
RS
1102 if (memory_access_type == VMAT_ELEMENTWISE
1103 || memory_access_type == VMAT_STRIDED_SLP)
892a981f
RS
1104 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1105 stmt_info, 0, vect_body);
720f5239 1106
73fbfcad 1107 if (dump_enabled_p ())
78c60e3d
SS
1108 dump_printf_loc (MSG_NOTE, vect_location,
1109 "vect_model_load_cost: inside_cost = %d, "
e645e942 1110 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1111}
1112
1113
1114/* Calculate cost of DR's memory access. */
1115void
1116vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1117 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1118 unsigned int *prologue_cost,
1119 stmt_vector_for_cost *prologue_cost_vec,
1120 stmt_vector_for_cost *body_cost_vec,
1121 bool record_prologue_costs)
720f5239
IR
1122{
1123 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1124 gimple *stmt = DR_STMT (dr);
c3e7ee41 1125 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1126
1127 switch (alignment_support_scheme)
ebfd146a
IR
1128 {
1129 case dr_aligned:
1130 {
92345349
BS
1131 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1132 stmt_info, 0, vect_body);
ebfd146a 1133
73fbfcad 1134 if (dump_enabled_p ())
78c60e3d 1135 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1136 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1137
1138 break;
1139 }
1140 case dr_unaligned_supported:
1141 {
720f5239 1142 /* Here, we assign an additional cost for the unaligned load. */
92345349 1143 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1144 unaligned_load, stmt_info,
92345349 1145 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1146
73fbfcad 1147 if (dump_enabled_p ())
78c60e3d
SS
1148 dump_printf_loc (MSG_NOTE, vect_location,
1149 "vect_model_load_cost: unaligned supported by "
e645e942 1150 "hardware.\n");
ebfd146a
IR
1151
1152 break;
1153 }
1154 case dr_explicit_realign:
1155 {
92345349
BS
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1157 vector_load, stmt_info, 0, vect_body);
1158 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1159 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1160
1161 /* FIXME: If the misalignment remains fixed across the iterations of
1162 the containing loop, the following cost should be added to the
92345349 1163 prologue costs. */
ebfd146a 1164 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1165 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1166 stmt_info, 0, vect_body);
ebfd146a 1167
73fbfcad 1168 if (dump_enabled_p ())
e645e942
TJ
1169 dump_printf_loc (MSG_NOTE, vect_location,
1170 "vect_model_load_cost: explicit realign\n");
8bd37302 1171
ebfd146a
IR
1172 break;
1173 }
1174 case dr_explicit_realign_optimized:
1175 {
73fbfcad 1176 if (dump_enabled_p ())
e645e942 1177 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1178 "vect_model_load_cost: unaligned software "
e645e942 1179 "pipelined.\n");
ebfd146a
IR
1180
1181 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1182 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1183 if this is an access in a group of loads, which provide grouped
ebfd146a 1184 access, then the above cost should only be considered for one
ff802fa1 1185 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1186 and a realignment op. */
1187
92345349 1188 if (add_realign_cost && record_prologue_costs)
ebfd146a 1189 {
92345349
BS
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
ebfd146a 1193 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1194 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1195 vector_stmt, stmt_info,
1196 0, vect_prologue);
ebfd146a
IR
1197 }
1198
92345349
BS
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1200 stmt_info, 0, vect_body);
1201 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1202 stmt_info, 0, vect_body);
8bd37302 1203
73fbfcad 1204 if (dump_enabled_p ())
78c60e3d 1205 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1206 "vect_model_load_cost: explicit realign optimized"
1207 "\n");
8bd37302 1208
ebfd146a
IR
1209 break;
1210 }
1211
38eec4c6
UW
1212 case dr_unaligned_unsupported:
1213 {
1214 *inside_cost = VECT_MAX_COST;
1215
73fbfcad 1216 if (dump_enabled_p ())
78c60e3d 1217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1218 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1219 break;
1220 }
1221
ebfd146a
IR
1222 default:
1223 gcc_unreachable ();
1224 }
ebfd146a
IR
1225}
1226
418b7df3
RG
1227/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1228 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1229
418b7df3 1230static void
355fe088 1231vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1232{
ebfd146a 1233 if (gsi)
418b7df3 1234 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1235 else
1236 {
418b7df3 1237 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1239
a70d6342
IR
1240 if (loop_vinfo)
1241 {
1242 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1243 basic_block new_bb;
1244 edge pe;
a70d6342
IR
1245
1246 if (nested_in_vect_loop_p (loop, stmt))
1247 loop = loop->inner;
b8698a0f 1248
a70d6342 1249 pe = loop_preheader_edge (loop);
418b7df3 1250 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1251 gcc_assert (!new_bb);
1252 }
1253 else
1254 {
1255 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1256 basic_block bb;
1257 gimple_stmt_iterator gsi_bb_start;
1258
1259 gcc_assert (bb_vinfo);
1260 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1261 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1262 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1263 }
ebfd146a
IR
1264 }
1265
73fbfcad 1266 if (dump_enabled_p ())
ebfd146a 1267 {
78c60e3d
SS
1268 dump_printf_loc (MSG_NOTE, vect_location,
1269 "created new init_stmt: ");
1270 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1271 }
418b7df3
RG
1272}
1273
1274/* Function vect_init_vector.
ebfd146a 1275
5467ee52
RG
1276 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1277 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1278 vector type a vector with all elements equal to VAL is created first.
1279 Place the initialization at BSI if it is not NULL. Otherwise, place the
1280 initialization at the loop preheader.
418b7df3
RG
1281 Return the DEF of INIT_STMT.
1282 It will be used in the vectorization of STMT. */
1283
1284tree
355fe088 1285vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1286{
355fe088 1287 gimple *init_stmt;
418b7df3
RG
1288 tree new_temp;
1289
e412ece4
RB
1290 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1291 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1292 {
e412ece4
RB
1293 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1294 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1295 {
5a308cf1
IE
1296 /* Scalar boolean value should be transformed into
1297 all zeros or all ones value before building a vector. */
1298 if (VECTOR_BOOLEAN_TYPE_P (type))
1299 {
b3d51f23
IE
1300 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1301 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1302
1303 if (CONSTANT_CLASS_P (val))
1304 val = integer_zerop (val) ? false_val : true_val;
1305 else
1306 {
1307 new_temp = make_ssa_name (TREE_TYPE (type));
1308 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1309 val, true_val, false_val);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
1311 val = new_temp;
1312 }
1313 }
1314 else if (CONSTANT_CLASS_P (val))
42fd8198 1315 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1316 else
1317 {
b731b390 1318 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1319 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1320 init_stmt = gimple_build_assign (new_temp,
1321 fold_build1 (VIEW_CONVERT_EXPR,
1322 TREE_TYPE (type),
1323 val));
1324 else
1325 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1326 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1327 val = new_temp;
418b7df3
RG
1328 }
1329 }
5467ee52 1330 val = build_vector_from_val (type, val);
418b7df3
RG
1331 }
1332
0e22bb5a
RB
1333 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1334 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1335 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1336 return new_temp;
ebfd146a
IR
1337}
1338
c83a894c 1339/* Function vect_get_vec_def_for_operand_1.
a70d6342 1340
c83a894c
AH
1341 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1342 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1343
1344tree
c83a894c 1345vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1346{
1347 tree vec_oprnd;
355fe088 1348 gimple *vec_stmt;
ebfd146a 1349 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1350
1351 switch (dt)
1352 {
81c40241 1353 /* operand is a constant or a loop invariant. */
ebfd146a 1354 case vect_constant_def:
81c40241 1355 case vect_external_def:
c83a894c
AH
1356 /* Code should use vect_get_vec_def_for_operand. */
1357 gcc_unreachable ();
ebfd146a 1358
81c40241 1359 /* operand is defined inside the loop. */
8644a673 1360 case vect_internal_def:
ebfd146a 1361 {
ebfd146a
IR
1362 /* Get the def from the vectorized stmt. */
1363 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1364
ebfd146a 1365 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1366 /* Get vectorized pattern statement. */
1367 if (!vec_stmt
1368 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1369 && !STMT_VINFO_RELEVANT (def_stmt_info))
1370 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1371 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1372 gcc_assert (vec_stmt);
1373 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1374 vec_oprnd = PHI_RESULT (vec_stmt);
1375 else if (is_gimple_call (vec_stmt))
1376 vec_oprnd = gimple_call_lhs (vec_stmt);
1377 else
1378 vec_oprnd = gimple_assign_lhs (vec_stmt);
1379 return vec_oprnd;
1380 }
1381
c78e3652 1382 /* operand is defined by a loop header phi. */
ebfd146a 1383 case vect_reduction_def:
06066f92 1384 case vect_double_reduction_def:
7c5222ff 1385 case vect_nested_cycle:
ebfd146a
IR
1386 case vect_induction_def:
1387 {
1388 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1389
1390 /* Get the def from the vectorized stmt. */
1391 def_stmt_info = vinfo_for_stmt (def_stmt);
1392 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1393 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1394 vec_oprnd = PHI_RESULT (vec_stmt);
1395 else
1396 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1397 return vec_oprnd;
1398 }
1399
1400 default:
1401 gcc_unreachable ();
1402 }
1403}
1404
1405
c83a894c
AH
1406/* Function vect_get_vec_def_for_operand.
1407
1408 OP is an operand in STMT. This function returns a (vector) def that will be
1409 used in the vectorized stmt for STMT.
1410
1411 In the case that OP is an SSA_NAME which is defined in the loop, then
1412 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1413
1414 In case OP is an invariant or constant, a new stmt that creates a vector def
1415 needs to be introduced. VECTYPE may be used to specify a required type for
1416 vector invariant. */
1417
1418tree
1419vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1420{
1421 gimple *def_stmt;
1422 enum vect_def_type dt;
1423 bool is_simple_use;
1424 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1426
1427 if (dump_enabled_p ())
1428 {
1429 dump_printf_loc (MSG_NOTE, vect_location,
1430 "vect_get_vec_def_for_operand: ");
1431 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1432 dump_printf (MSG_NOTE, "\n");
1433 }
1434
1435 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1436 gcc_assert (is_simple_use);
1437 if (def_stmt && dump_enabled_p ())
1438 {
1439 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1440 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1441 }
1442
1443 if (dt == vect_constant_def || dt == vect_external_def)
1444 {
1445 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1446 tree vector_type;
1447
1448 if (vectype)
1449 vector_type = vectype;
2568d8a1 1450 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1451 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1452 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1453 else
1454 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1455
1456 gcc_assert (vector_type);
1457 return vect_init_vector (stmt, op, vector_type, NULL);
1458 }
1459 else
1460 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1461}
1462
1463
ebfd146a
IR
1464/* Function vect_get_vec_def_for_stmt_copy
1465
ff802fa1 1466 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1467 vectorized stmt to be created (by the caller to this function) is a "copy"
1468 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1469 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1470 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1471 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1472 DT is the type of the vector def VEC_OPRND.
1473
1474 Context:
1475 In case the vectorization factor (VF) is bigger than the number
1476 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1477 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1478 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1479 smallest data-type determines the VF, and as a result, when vectorizing
1480 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1481 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1482 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1483 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1484 which VF=16 and nunits=4, so the number of copies required is 4):
1485
1486 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1487
ebfd146a
IR
1488 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1489 VS1.1: vx.1 = memref1 VS1.2
1490 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1491 VS1.3: vx.3 = memref3
ebfd146a
IR
1492
1493 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1494 VSnew.1: vz1 = vx.1 + ... VSnew.2
1495 VSnew.2: vz2 = vx.2 + ... VSnew.3
1496 VSnew.3: vz3 = vx.3 + ...
1497
1498 The vectorization of S1 is explained in vectorizable_load.
1499 The vectorization of S2:
b8698a0f
L
1500 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1501 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1502 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1503 returns the vector-def 'vx.0'.
1504
b8698a0f
L
1505 To create the remaining copies of the vector-stmt (VSnew.j), this
1506 function is called to get the relevant vector-def for each operand. It is
1507 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1508 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1509
b8698a0f
L
1510 For example, to obtain the vector-def 'vx.1' in order to create the
1511 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1512 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1513 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1514 and return its def ('vx.1').
1515 Overall, to create the above sequence this function will be called 3 times:
1516 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1517 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1518 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1519
1520tree
1521vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1522{
355fe088 1523 gimple *vec_stmt_for_operand;
ebfd146a
IR
1524 stmt_vec_info def_stmt_info;
1525
1526 /* Do nothing; can reuse same def. */
8644a673 1527 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1528 return vec_oprnd;
1529
1530 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1531 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1532 gcc_assert (def_stmt_info);
1533 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1534 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1535 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1536 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1537 else
1538 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1539 return vec_oprnd;
1540}
1541
1542
1543/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1544 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1545
c78e3652 1546void
b8698a0f 1547vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1548 vec<tree> *vec_oprnds0,
1549 vec<tree> *vec_oprnds1)
ebfd146a 1550{
9771b263 1551 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1552
1553 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1554 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1555
9771b263 1556 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1557 {
9771b263 1558 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1560 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1561 }
1562}
1563
1564
c78e3652 1565/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1566
c78e3652 1567void
355fe088 1568vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1569 vec<tree> *vec_oprnds0,
1570 vec<tree> *vec_oprnds1,
306b0c92 1571 slp_tree slp_node)
ebfd146a
IR
1572{
1573 if (slp_node)
d092494c
IR
1574 {
1575 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1576 auto_vec<tree> ops (nops);
1577 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1578
9771b263 1579 ops.quick_push (op0);
d092494c 1580 if (op1)
9771b263 1581 ops.quick_push (op1);
d092494c 1582
306b0c92 1583 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1584
37b5ec8f 1585 *vec_oprnds0 = vec_defs[0];
d092494c 1586 if (op1)
37b5ec8f 1587 *vec_oprnds1 = vec_defs[1];
d092494c 1588 }
ebfd146a
IR
1589 else
1590 {
1591 tree vec_oprnd;
1592
9771b263 1593 vec_oprnds0->create (1);
81c40241 1594 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1595 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1596
1597 if (op1)
1598 {
9771b263 1599 vec_oprnds1->create (1);
81c40241 1600 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1601 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1602 }
1603 }
1604}
1605
1606
1607/* Function vect_finish_stmt_generation.
1608
1609 Insert a new stmt. */
1610
1611void
355fe088 1612vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1613 gimple_stmt_iterator *gsi)
1614{
1615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1616 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1617
1618 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1619
54e8e2c3
RG
1620 if (!gsi_end_p (*gsi)
1621 && gimple_has_mem_ops (vec_stmt))
1622 {
355fe088 1623 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1624 tree vuse = gimple_vuse (at_stmt);
1625 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1626 {
1627 tree vdef = gimple_vdef (at_stmt);
1628 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1629 /* If we have an SSA vuse and insert a store, update virtual
1630 SSA form to avoid triggering the renamer. Do so only
1631 if we can easily see all uses - which is what almost always
1632 happens with the way vectorized stmts are inserted. */
1633 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1634 && ((is_gimple_assign (vec_stmt)
1635 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1636 || (is_gimple_call (vec_stmt)
1637 && !(gimple_call_flags (vec_stmt)
1638 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1639 {
1640 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1641 gimple_set_vdef (vec_stmt, new_vdef);
1642 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1643 }
1644 }
1645 }
ebfd146a
IR
1646 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1647
310213d4 1648 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1649
73fbfcad 1650 if (dump_enabled_p ())
ebfd146a 1651 {
78c60e3d
SS
1652 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1654 }
1655
ad885386 1656 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1657
1658 /* While EH edges will generally prevent vectorization, stmt might
1659 e.g. be in a must-not-throw region. Ensure newly created stmts
1660 that could throw are part of the same region. */
1661 int lp_nr = lookup_stmt_eh_lp (stmt);
1662 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1663 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1664}
1665
70439f0d
RS
1666/* We want to vectorize a call to combined function CFN with function
1667 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1668 as the types of all inputs. Check whether this is possible using
1669 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1670
70439f0d
RS
1671static internal_fn
1672vectorizable_internal_function (combined_fn cfn, tree fndecl,
1673 tree vectype_out, tree vectype_in)
ebfd146a 1674{
70439f0d
RS
1675 internal_fn ifn;
1676 if (internal_fn_p (cfn))
1677 ifn = as_internal_fn (cfn);
1678 else
1679 ifn = associated_internal_fn (fndecl);
1680 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1681 {
1682 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1683 if (info.vectorizable)
1684 {
1685 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1686 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1687 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1688 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1689 return ifn;
1690 }
1691 }
1692 return IFN_LAST;
ebfd146a
IR
1693}
1694
5ce9450f 1695
355fe088 1696static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1697 gimple_stmt_iterator *);
1698
7cfb4d93
RS
1699/* Check whether a load or store statement in the loop described by
1700 LOOP_VINFO is possible in a fully-masked loop. This is testing
1701 whether the vectorizer pass has the appropriate support, as well as
1702 whether the target does.
1703
1704 VLS_TYPE says whether the statement is a load or store and VECTYPE
1705 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1706 says how the load or store is going to be implemented and GROUP_SIZE
1707 is the number of load or store statements in the containing group.
1708
1709 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1710 supported, otherwise record the required mask types. */
1711
1712static void
1713check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1714 vec_load_store_type vls_type, int group_size,
1715 vect_memory_access_type memory_access_type)
1716{
1717 /* Invariant loads need no special support. */
1718 if (memory_access_type == VMAT_INVARIANT)
1719 return;
1720
1721 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1722 machine_mode vecmode = TYPE_MODE (vectype);
1723 bool is_load = (vls_type == VLS_LOAD);
1724 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1725 {
1726 if (is_load
1727 ? !vect_load_lanes_supported (vectype, group_size, true)
1728 : !vect_store_lanes_supported (vectype, group_size, true))
1729 {
1730 if (dump_enabled_p ())
1731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1732 "can't use a fully-masked loop because the"
1733 " target doesn't have an appropriate masked"
1734 " load/store-lanes instruction.\n");
1735 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1736 return;
1737 }
1738 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1739 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1740 return;
1741 }
1742
1743 if (memory_access_type != VMAT_CONTIGUOUS
1744 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1745 {
1746 /* Element X of the data must come from iteration i * VF + X of the
1747 scalar loop. We need more work to support other mappings. */
1748 if (dump_enabled_p ())
1749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1750 "can't use a fully-masked loop because an access"
1751 " isn't contiguous.\n");
1752 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1753 return;
1754 }
1755
1756 machine_mode mask_mode;
1757 if (!(targetm.vectorize.get_mask_mode
1758 (GET_MODE_NUNITS (vecmode),
1759 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1760 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1761 {
1762 if (dump_enabled_p ())
1763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1764 "can't use a fully-masked loop because the target"
1765 " doesn't have the appropriate masked load or"
1766 " store.\n");
1767 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1768 return;
1769 }
1770 /* We might load more scalars than we need for permuting SLP loads.
1771 We checked in get_group_load_store_type that the extra elements
1772 don't leak into a new vector. */
1773 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1774 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1775 unsigned int nvectors;
1776 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1777 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1778 else
1779 gcc_unreachable ();
1780}
1781
1782/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1783 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1784 that needs to be applied to all loads and stores in a vectorized loop.
1785 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1786
1787 MASK_TYPE is the type of both masks. If new statements are needed,
1788 insert them before GSI. */
1789
1790static tree
1791prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1792 gimple_stmt_iterator *gsi)
1793{
1794 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1795 if (!loop_mask)
1796 return vec_mask;
1797
1798 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1799 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1800 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1801 vec_mask, loop_mask);
1802 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1803 return and_res;
1804}
1805
62da9e14
RS
1806/* STMT is a non-strided load or store, meaning that it accesses
1807 elements with a known constant step. Return -1 if that step
1808 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1809
1810static int
1811compare_step_with_zero (gimple *stmt)
1812{
1813 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
1814 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1815 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1816 size_zero_node);
62da9e14
RS
1817}
1818
1819/* If the target supports a permute mask that reverses the elements in
1820 a vector of type VECTYPE, return that mask, otherwise return null. */
1821
1822static tree
1823perm_mask_for_reverse (tree vectype)
1824{
928686b1 1825 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 1826
d980067b
RS
1827 /* The encoding has a single stepped pattern. */
1828 vec_perm_builder sel (nunits, 1, 3);
928686b1 1829 for (int i = 0; i < 3; ++i)
908a1a16 1830 sel.quick_push (nunits - 1 - i);
62da9e14 1831
e3342de4
RS
1832 vec_perm_indices indices (sel, 1, nunits);
1833 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 1834 return NULL_TREE;
e3342de4 1835 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 1836}
5ce9450f 1837
c3a8f964
RS
1838/* STMT is either a masked or unconditional store. Return the value
1839 being stored. */
1840
1841static tree
1842vect_get_store_rhs (gimple *stmt)
1843{
1844 if (gassign *assign = dyn_cast <gassign *> (stmt))
1845 {
1846 gcc_assert (gimple_assign_single_p (assign));
1847 return gimple_assign_rhs1 (assign);
1848 }
1849 if (gcall *call = dyn_cast <gcall *> (stmt))
1850 {
1851 internal_fn ifn = gimple_call_internal_fn (call);
1852 gcc_assert (ifn == IFN_MASK_STORE);
1853 return gimple_call_arg (stmt, 3);
1854 }
1855 gcc_unreachable ();
1856}
1857
2de001ee
RS
1858/* A subroutine of get_load_store_type, with a subset of the same
1859 arguments. Handle the case where STMT is part of a grouped load
1860 or store.
1861
1862 For stores, the statements in the group are all consecutive
1863 and there is no gap at the end. For loads, the statements in the
1864 group might not be consecutive; there can be gaps between statements
1865 as well as at the end. */
1866
1867static bool
1868get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 1869 bool masked_p, vec_load_store_type vls_type,
2de001ee
RS
1870 vect_memory_access_type *memory_access_type)
1871{
1872 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1873 vec_info *vinfo = stmt_info->vinfo;
1874 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1875 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1876 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 1877 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
1878 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1879 bool single_element_p = (stmt == first_stmt
1880 && !GROUP_NEXT_ELEMENT (stmt_info));
1881 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 1882 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
1883
1884 /* True if the vectorized statements would access beyond the last
1885 statement in the group. */
1886 bool overrun_p = false;
1887
1888 /* True if we can cope with such overrun by peeling for gaps, so that
1889 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
1890 bool can_overrun_p = (!masked_p
1891 && vls_type == VLS_LOAD
1892 && loop_vinfo
1893 && !loop->inner);
2de001ee
RS
1894
1895 /* There can only be a gap at the end of the group if the stride is
1896 known at compile time. */
1897 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1898
1899 /* Stores can't yet have gaps. */
1900 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1901
1902 if (slp)
1903 {
1904 if (STMT_VINFO_STRIDED_P (stmt_info))
1905 {
1906 /* Try to use consecutive accesses of GROUP_SIZE elements,
1907 separated by the stride, until we have a complete vector.
1908 Fall back to scalar accesses if that isn't possible. */
928686b1 1909 if (multiple_p (nunits, group_size))
2de001ee
RS
1910 *memory_access_type = VMAT_STRIDED_SLP;
1911 else
1912 *memory_access_type = VMAT_ELEMENTWISE;
1913 }
1914 else
1915 {
1916 overrun_p = loop_vinfo && gap != 0;
1917 if (overrun_p && vls_type != VLS_LOAD)
1918 {
1919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1920 "Grouped store with gaps requires"
1921 " non-consecutive accesses\n");
1922 return false;
1923 }
f702e7d4
RS
1924 /* An overrun is fine if the trailing elements are smaller
1925 than the alignment boundary B. Every vector access will
1926 be a multiple of B and so we are guaranteed to access a
1927 non-gap element in the same B-sized block. */
f9ef2c76 1928 if (overrun_p
f702e7d4
RS
1929 && gap < (vect_known_alignment_in_bytes (first_dr)
1930 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1931 overrun_p = false;
2de001ee
RS
1932 if (overrun_p && !can_overrun_p)
1933 {
1934 if (dump_enabled_p ())
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1936 "Peeling for outer loop is not supported\n");
1937 return false;
1938 }
1939 *memory_access_type = VMAT_CONTIGUOUS;
1940 }
1941 }
1942 else
1943 {
1944 /* We can always handle this case using elementwise accesses,
1945 but see if something more efficient is available. */
1946 *memory_access_type = VMAT_ELEMENTWISE;
1947
1948 /* If there is a gap at the end of the group then these optimizations
1949 would access excess elements in the last iteration. */
1950 bool would_overrun_p = (gap != 0);
f702e7d4
RS
1951 /* An overrun is fine if the trailing elements are smaller than the
1952 alignment boundary B. Every vector access will be a multiple of B
1953 and so we are guaranteed to access a non-gap element in the
1954 same B-sized block. */
f9ef2c76 1955 if (would_overrun_p
7e11fc7f 1956 && !masked_p
f702e7d4
RS
1957 && gap < (vect_known_alignment_in_bytes (first_dr)
1958 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 1959 would_overrun_p = false;
f702e7d4 1960
2de001ee 1961 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
1962 && (can_overrun_p || !would_overrun_p)
1963 && compare_step_with_zero (stmt) > 0)
2de001ee 1964 {
6737facb
RS
1965 /* First cope with the degenerate case of a single-element
1966 vector. */
1967 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
1968 *memory_access_type = VMAT_CONTIGUOUS;
1969
1970 /* Otherwise try using LOAD/STORE_LANES. */
1971 if (*memory_access_type == VMAT_ELEMENTWISE
1972 && (vls_type == VLS_LOAD
7e11fc7f
RS
1973 ? vect_load_lanes_supported (vectype, group_size, masked_p)
1974 : vect_store_lanes_supported (vectype, group_size,
1975 masked_p)))
2de001ee
RS
1976 {
1977 *memory_access_type = VMAT_LOAD_STORE_LANES;
1978 overrun_p = would_overrun_p;
1979 }
1980
1981 /* If that fails, try using permuting loads. */
1982 if (*memory_access_type == VMAT_ELEMENTWISE
1983 && (vls_type == VLS_LOAD
1984 ? vect_grouped_load_supported (vectype, single_element_p,
1985 group_size)
1986 : vect_grouped_store_supported (vectype, group_size)))
1987 {
1988 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1989 overrun_p = would_overrun_p;
1990 }
1991 }
1992 }
1993
1994 if (vls_type != VLS_LOAD && first_stmt == stmt)
1995 {
1996 /* STMT is the leader of the group. Check the operands of all the
1997 stmts of the group. */
1998 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1999 while (next_stmt)
2000 {
7e11fc7f 2001 tree op = vect_get_store_rhs (next_stmt);
2de001ee
RS
2002 gimple *def_stmt;
2003 enum vect_def_type dt;
2004 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2005 {
2006 if (dump_enabled_p ())
2007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2008 "use not simple.\n");
2009 return false;
2010 }
2011 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2012 }
2013 }
2014
2015 if (overrun_p)
2016 {
2017 gcc_assert (can_overrun_p);
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2020 "Data access with gaps requires scalar "
2021 "epilogue loop\n");
2022 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2023 }
2024
2025 return true;
2026}
2027
62da9e14
RS
2028/* A subroutine of get_load_store_type, with a subset of the same
2029 arguments. Handle the case where STMT is a load or store that
2030 accesses consecutive elements with a negative step. */
2031
2032static vect_memory_access_type
2033get_negative_load_store_type (gimple *stmt, tree vectype,
2034 vec_load_store_type vls_type,
2035 unsigned int ncopies)
2036{
2037 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2038 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2039 dr_alignment_support alignment_support_scheme;
2040
2041 if (ncopies > 1)
2042 {
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2045 "multiple types with negative step.\n");
2046 return VMAT_ELEMENTWISE;
2047 }
2048
2049 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2050 if (alignment_support_scheme != dr_aligned
2051 && alignment_support_scheme != dr_unaligned_supported)
2052 {
2053 if (dump_enabled_p ())
2054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2055 "negative step but alignment required.\n");
2056 return VMAT_ELEMENTWISE;
2057 }
2058
2059 if (vls_type == VLS_STORE_INVARIANT)
2060 {
2061 if (dump_enabled_p ())
2062 dump_printf_loc (MSG_NOTE, vect_location,
2063 "negative step with invariant source;"
2064 " no permute needed.\n");
2065 return VMAT_CONTIGUOUS_DOWN;
2066 }
2067
2068 if (!perm_mask_for_reverse (vectype))
2069 {
2070 if (dump_enabled_p ())
2071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2072 "negative step and reversing not supported.\n");
2073 return VMAT_ELEMENTWISE;
2074 }
2075
2076 return VMAT_CONTIGUOUS_REVERSE;
2077}
2078
2de001ee
RS
2079/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2080 if there is a memory access type that the vectorized form can use,
2081 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2082 or scatters, fill in GS_INFO accordingly.
2083
2084 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2085 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2086 VECTYPE is the vector type that the vectorized statements will use.
2087 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2088
2089static bool
7e11fc7f 2090get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2091 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2092 vect_memory_access_type *memory_access_type,
2093 gather_scatter_info *gs_info)
2094{
2095 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2096 vec_info *vinfo = stmt_info->vinfo;
2097 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2098 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2099 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2100 {
2101 *memory_access_type = VMAT_GATHER_SCATTER;
2102 gimple *def_stmt;
2103 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2104 gcc_unreachable ();
2105 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2106 &gs_info->offset_dt,
2107 &gs_info->offset_vectype))
2108 {
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2111 "%s index use not simple.\n",
2112 vls_type == VLS_LOAD ? "gather" : "scatter");
2113 return false;
2114 }
2115 }
2116 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2117 {
7e11fc7f 2118 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2de001ee
RS
2119 memory_access_type))
2120 return false;
2121 }
2122 else if (STMT_VINFO_STRIDED_P (stmt_info))
2123 {
2124 gcc_assert (!slp);
2125 *memory_access_type = VMAT_ELEMENTWISE;
2126 }
2127 else
62da9e14
RS
2128 {
2129 int cmp = compare_step_with_zero (stmt);
2130 if (cmp < 0)
2131 *memory_access_type = get_negative_load_store_type
2132 (stmt, vectype, vls_type, ncopies);
2133 else if (cmp == 0)
2134 {
2135 gcc_assert (vls_type == VLS_LOAD);
2136 *memory_access_type = VMAT_INVARIANT;
2137 }
2138 else
2139 *memory_access_type = VMAT_CONTIGUOUS;
2140 }
2de001ee 2141
4d694b27
RS
2142 if ((*memory_access_type == VMAT_ELEMENTWISE
2143 || *memory_access_type == VMAT_STRIDED_SLP)
2144 && !nunits.is_constant ())
2145 {
2146 if (dump_enabled_p ())
2147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2148 "Not using elementwise accesses due to variable "
2149 "vectorization factor.\n");
2150 return false;
2151 }
2152
2de001ee
RS
2153 /* FIXME: At the moment the cost model seems to underestimate the
2154 cost of using elementwise accesses. This check preserves the
2155 traditional behavior until that can be fixed. */
2156 if (*memory_access_type == VMAT_ELEMENTWISE
2157 && !STMT_VINFO_STRIDED_P (stmt_info))
2158 {
2159 if (dump_enabled_p ())
2160 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2161 "not falling back to elementwise accesses\n");
2162 return false;
2163 }
2164 return true;
2165}
2166
aaeefd88
RS
2167/* Return true if boolean argument MASK is suitable for vectorizing
2168 conditional load or store STMT. When returning true, store the
2169 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2170
2171static bool
2172vect_check_load_store_mask (gimple *stmt, tree mask, tree *mask_vectype_out)
2173{
2174 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2175 {
2176 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 "mask argument is not a boolean.\n");
2179 return false;
2180 }
2181
2182 if (TREE_CODE (mask) != SSA_NAME)
2183 {
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2186 "mask argument is not an SSA name.\n");
2187 return false;
2188 }
2189
2190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2191 gimple *def_stmt;
2192 enum vect_def_type dt;
2193 tree mask_vectype;
2194 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &dt,
2195 &mask_vectype))
2196 {
2197 if (dump_enabled_p ())
2198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2199 "mask use not simple.\n");
2200 return false;
2201 }
2202
2203 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2204 if (!mask_vectype)
2205 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2206
2207 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2208 {
2209 if (dump_enabled_p ())
2210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2211 "could not find an appropriate vector mask type.\n");
2212 return false;
2213 }
2214
2215 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2216 TYPE_VECTOR_SUBPARTS (vectype)))
2217 {
2218 if (dump_enabled_p ())
2219 {
2220 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2221 "vector mask type ");
2222 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2223 dump_printf (MSG_MISSED_OPTIMIZATION,
2224 " does not match vector data type ");
2225 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2226 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2227 }
2228 return false;
2229 }
2230
2231 *mask_vectype_out = mask_vectype;
2232 return true;
2233}
2234
3133c3b6
RS
2235/* Return true if stored value RHS is suitable for vectorizing store
2236 statement STMT. When returning true, store the type of the
2237 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2238 store in *VLS_TYPE_OUT. */
2239
2240static bool
2241vect_check_store_rhs (gimple *stmt, tree rhs, tree *rhs_vectype_out,
2242 vec_load_store_type *vls_type_out)
2243{
2244 /* In the case this is a store from a constant make sure
2245 native_encode_expr can handle it. */
2246 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2247 {
2248 if (dump_enabled_p ())
2249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2250 "cannot encode constant as a byte sequence.\n");
2251 return false;
2252 }
2253
2254 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2255 gimple *def_stmt;
2256 enum vect_def_type dt;
2257 tree rhs_vectype;
2258 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &dt,
2259 &rhs_vectype))
2260 {
2261 if (dump_enabled_p ())
2262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2263 "use not simple.\n");
2264 return false;
2265 }
2266
2267 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2268 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2269 {
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "incompatible vector types.\n");
2273 return false;
2274 }
2275
2276 *rhs_vectype_out = rhs_vectype;
2277 if (dt == vect_constant_def || dt == vect_external_def)
2278 *vls_type_out = VLS_STORE_INVARIANT;
2279 else
2280 *vls_type_out = VLS_STORE;
2281 return true;
2282}
2283
bc9587eb
RS
2284/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2285 Note that we support masks with floating-point type, in which case the
2286 floats are interpreted as a bitmask. */
2287
2288static tree
2289vect_build_all_ones_mask (gimple *stmt, tree masktype)
2290{
2291 if (TREE_CODE (masktype) == INTEGER_TYPE)
2292 return build_int_cst (masktype, -1);
2293 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2294 {
2295 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2296 mask = build_vector_from_val (masktype, mask);
2297 return vect_init_vector (stmt, mask, masktype, NULL);
2298 }
2299 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2300 {
2301 REAL_VALUE_TYPE r;
2302 long tmp[6];
2303 for (int j = 0; j < 6; ++j)
2304 tmp[j] = -1;
2305 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2306 tree mask = build_real (TREE_TYPE (masktype), r);
2307 mask = build_vector_from_val (masktype, mask);
2308 return vect_init_vector (stmt, mask, masktype, NULL);
2309 }
2310 gcc_unreachable ();
2311}
2312
2313/* Build an all-zero merge value of type VECTYPE while vectorizing
2314 STMT as a gather load. */
2315
2316static tree
2317vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2318{
2319 tree merge;
2320 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2321 merge = build_int_cst (TREE_TYPE (vectype), 0);
2322 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2323 {
2324 REAL_VALUE_TYPE r;
2325 long tmp[6];
2326 for (int j = 0; j < 6; ++j)
2327 tmp[j] = 0;
2328 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2329 merge = build_real (TREE_TYPE (vectype), r);
2330 }
2331 else
2332 gcc_unreachable ();
2333 merge = build_vector_from_val (vectype, merge);
2334 return vect_init_vector (stmt, merge, vectype, NULL);
2335}
2336
c48d2d35
RS
2337/* Build a gather load call while vectorizing STMT. Insert new instructions
2338 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2339 operation. If the load is conditional, MASK is the unvectorized
2340 condition, otherwise MASK is null. */
2341
2342static void
2343vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2344 gimple **vec_stmt, gather_scatter_info *gs_info,
2345 tree mask)
2346{
2347 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2348 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2349 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2350 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2351 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2352 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2353 edge pe = loop_preheader_edge (loop);
2354 enum { NARROW, NONE, WIDEN } modifier;
2355 poly_uint64 gather_off_nunits
2356 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2357
2358 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2359 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2360 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2361 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2362 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2363 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2364 tree scaletype = TREE_VALUE (arglist);
2365 gcc_checking_assert (types_compatible_p (srctype, rettype)
2366 && (!mask || types_compatible_p (srctype, masktype)));
2367
2368 tree perm_mask = NULL_TREE;
2369 tree mask_perm_mask = NULL_TREE;
2370 if (known_eq (nunits, gather_off_nunits))
2371 modifier = NONE;
2372 else if (known_eq (nunits * 2, gather_off_nunits))
2373 {
2374 modifier = WIDEN;
2375
2376 /* Currently widening gathers and scatters are only supported for
2377 fixed-length vectors. */
2378 int count = gather_off_nunits.to_constant ();
2379 vec_perm_builder sel (count, count, 1);
2380 for (int i = 0; i < count; ++i)
2381 sel.quick_push (i | (count / 2));
2382
2383 vec_perm_indices indices (sel, 1, count);
2384 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2385 indices);
2386 }
2387 else if (known_eq (nunits, gather_off_nunits * 2))
2388 {
2389 modifier = NARROW;
2390
2391 /* Currently narrowing gathers and scatters are only supported for
2392 fixed-length vectors. */
2393 int count = nunits.to_constant ();
2394 vec_perm_builder sel (count, count, 1);
2395 sel.quick_grow (count);
2396 for (int i = 0; i < count; ++i)
2397 sel[i] = i < count / 2 ? i : i + count / 2;
2398 vec_perm_indices indices (sel, 2, count);
2399 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2400
2401 ncopies *= 2;
2402
2403 if (mask)
2404 {
2405 for (int i = 0; i < count; ++i)
2406 sel[i] = i | (count / 2);
2407 indices.new_vector (sel, 2, count);
2408 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2409 }
2410 }
2411 else
2412 gcc_unreachable ();
2413
2414 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2415 vectype);
2416
2417 tree ptr = fold_convert (ptrtype, gs_info->base);
2418 if (!is_gimple_min_invariant (ptr))
2419 {
2420 gimple_seq seq;
2421 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2422 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2423 gcc_assert (!new_bb);
2424 }
2425
2426 tree scale = build_int_cst (scaletype, gs_info->scale);
2427
2428 tree vec_oprnd0 = NULL_TREE;
2429 tree vec_mask = NULL_TREE;
2430 tree src_op = NULL_TREE;
2431 tree mask_op = NULL_TREE;
2432 tree prev_res = NULL_TREE;
2433 stmt_vec_info prev_stmt_info = NULL;
2434
2435 if (!mask)
2436 {
2437 src_op = vect_build_zero_merge_argument (stmt, rettype);
2438 mask_op = vect_build_all_ones_mask (stmt, masktype);
2439 }
2440
2441 for (int j = 0; j < ncopies; ++j)
2442 {
2443 tree op, var;
2444 gimple *new_stmt;
2445 if (modifier == WIDEN && (j & 1))
2446 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2447 perm_mask, stmt, gsi);
2448 else if (j == 0)
2449 op = vec_oprnd0
2450 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2451 else
2452 op = vec_oprnd0
2453 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2454
2455 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2456 {
2457 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2458 TYPE_VECTOR_SUBPARTS (idxtype)));
2459 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2460 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2461 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2462 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2463 op = var;
2464 }
2465
2466 if (mask)
2467 {
2468 if (mask_perm_mask && (j & 1))
2469 mask_op = permute_vec_elements (mask_op, mask_op,
2470 mask_perm_mask, stmt, gsi);
2471 else
2472 {
2473 if (j == 0)
2474 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2475 else
2476 {
2477 gimple *def_stmt;
2478 enum vect_def_type dt;
2479 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2480 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2481 }
2482
2483 mask_op = vec_mask;
2484 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2485 {
2486 gcc_assert
2487 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2488 TYPE_VECTOR_SUBPARTS (masktype)));
2489 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2490 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2491 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2492 mask_op);
2493 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2494 mask_op = var;
2495 }
2496 }
2497 src_op = mask_op;
2498 }
2499
2500 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2501 mask_op, scale);
2502
2503 if (!useless_type_conversion_p (vectype, rettype))
2504 {
2505 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2506 TYPE_VECTOR_SUBPARTS (rettype)));
2507 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2508 gimple_call_set_lhs (new_stmt, op);
2509 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2510 var = make_ssa_name (vec_dest);
2511 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2512 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2513 }
2514 else
2515 {
2516 var = make_ssa_name (vec_dest, new_stmt);
2517 gimple_call_set_lhs (new_stmt, var);
2518 }
2519
2520 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2521
2522 if (modifier == NARROW)
2523 {
2524 if ((j & 1) == 0)
2525 {
2526 prev_res = var;
2527 continue;
2528 }
2529 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2530 new_stmt = SSA_NAME_DEF_STMT (var);
2531 }
2532
2533 if (prev_stmt_info == NULL)
2534 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2535 else
2536 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2537 prev_stmt_info = vinfo_for_stmt (new_stmt);
2538 }
2539}
2540
37b14185
RB
2541/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2542
2543static bool
2544vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2545 gimple **vec_stmt, slp_tree slp_node,
2546 tree vectype_in, enum vect_def_type *dt)
2547{
2548 tree op, vectype;
2549 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2550 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2551 unsigned ncopies;
2552 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2553
2554 op = gimple_call_arg (stmt, 0);
2555 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2556
2557 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2558 return false;
37b14185
RB
2559
2560 /* Multiple types in SLP are handled by creating the appropriate number of
2561 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2562 case of SLP. */
2563 if (slp_node)
2564 ncopies = 1;
2565 else
e8f142e2 2566 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2567
2568 gcc_assert (ncopies >= 1);
2569
2570 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2571 if (! char_vectype)
2572 return false;
2573
928686b1
RS
2574 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2575 return false;
2576
794e3180 2577 unsigned word_bytes = num_bytes / nunits;
908a1a16 2578
d980067b
RS
2579 /* The encoding uses one stepped pattern for each byte in the word. */
2580 vec_perm_builder elts (num_bytes, word_bytes, 3);
2581 for (unsigned i = 0; i < 3; ++i)
37b14185 2582 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2583 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2584
e3342de4
RS
2585 vec_perm_indices indices (elts, 1, num_bytes);
2586 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2587 return false;
2588
2589 if (! vec_stmt)
2590 {
2591 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2592 if (dump_enabled_p ())
2593 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2594 "\n");
2595 if (! PURE_SLP_STMT (stmt_info))
2596 {
2597 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2598 1, vector_stmt, stmt_info, 0, vect_prologue);
2599 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2600 ncopies, vec_perm, stmt_info, 0, vect_body);
2601 }
2602 return true;
2603 }
2604
736d0f28 2605 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
2606
2607 /* Transform. */
2608 vec<tree> vec_oprnds = vNULL;
2609 gimple *new_stmt = NULL;
2610 stmt_vec_info prev_stmt_info = NULL;
2611 for (unsigned j = 0; j < ncopies; j++)
2612 {
2613 /* Handle uses. */
2614 if (j == 0)
306b0c92 2615 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
2616 else
2617 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2618
2619 /* Arguments are ready. create the new vector stmt. */
2620 unsigned i;
2621 tree vop;
2622 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2623 {
2624 tree tem = make_ssa_name (char_vectype);
2625 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2626 char_vectype, vop));
2627 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2628 tree tem2 = make_ssa_name (char_vectype);
2629 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2630 tem, tem, bswap_vconst);
2631 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2632 tem = make_ssa_name (vectype);
2633 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2634 vectype, tem2));
2635 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2636 if (slp_node)
2637 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2638 }
2639
2640 if (slp_node)
2641 continue;
2642
2643 if (j == 0)
2644 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2645 else
2646 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2647
2648 prev_stmt_info = vinfo_for_stmt (new_stmt);
2649 }
2650
2651 vec_oprnds.release ();
2652 return true;
2653}
2654
b1b6836e
RS
2655/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2656 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2657 in a single step. On success, store the binary pack code in
2658 *CONVERT_CODE. */
2659
2660static bool
2661simple_integer_narrowing (tree vectype_out, tree vectype_in,
2662 tree_code *convert_code)
2663{
2664 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2665 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2666 return false;
2667
2668 tree_code code;
2669 int multi_step_cvt = 0;
2670 auto_vec <tree, 8> interm_types;
2671 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2672 &code, &multi_step_cvt,
2673 &interm_types)
2674 || multi_step_cvt)
2675 return false;
2676
2677 *convert_code = code;
2678 return true;
2679}
5ce9450f 2680
ebfd146a
IR
2681/* Function vectorizable_call.
2682
538dd0b7 2683 Check if GS performs a function call that can be vectorized.
b8698a0f 2684 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2685 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2686 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2687
2688static bool
355fe088 2689vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2690 slp_tree slp_node)
ebfd146a 2691{
538dd0b7 2692 gcall *stmt;
ebfd146a
IR
2693 tree vec_dest;
2694 tree scalar_dest;
2695 tree op, type;
2696 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2697 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 2698 tree vectype_out, vectype_in;
c7bda0f4
RS
2699 poly_uint64 nunits_in;
2700 poly_uint64 nunits_out;
ebfd146a 2701 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2702 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2703 vec_info *vinfo = stmt_info->vinfo;
81c40241 2704 tree fndecl, new_temp, rhs_type;
355fe088 2705 gimple *def_stmt;
0502fb85
UB
2706 enum vect_def_type dt[3]
2707 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 2708 int ndts = 3;
355fe088 2709 gimple *new_stmt = NULL;
ebfd146a 2710 int ncopies, j;
6e1aa848 2711 vec<tree> vargs = vNULL;
ebfd146a
IR
2712 enum { NARROW, NONE, WIDEN } modifier;
2713 size_t i, nargs;
9d5e7640 2714 tree lhs;
ebfd146a 2715
190c2236 2716 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2717 return false;
2718
66c16fd9
RB
2719 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2720 && ! vec_stmt)
ebfd146a
IR
2721 return false;
2722
538dd0b7
DM
2723 /* Is GS a vectorizable call? */
2724 stmt = dyn_cast <gcall *> (gs);
2725 if (!stmt)
ebfd146a
IR
2726 return false;
2727
5ce9450f
JJ
2728 if (gimple_call_internal_p (stmt)
2729 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2730 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
c3a8f964
RS
2731 /* Handled by vectorizable_load and vectorizable_store. */
2732 return false;
5ce9450f 2733
0136f8f0
AH
2734 if (gimple_call_lhs (stmt) == NULL_TREE
2735 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2736 return false;
2737
0136f8f0 2738 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2739
b690cc0f
RG
2740 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2741
ebfd146a
IR
2742 /* Process function arguments. */
2743 rhs_type = NULL_TREE;
b690cc0f 2744 vectype_in = NULL_TREE;
ebfd146a
IR
2745 nargs = gimple_call_num_args (stmt);
2746
1b1562a5
MM
2747 /* Bail out if the function has more than three arguments, we do not have
2748 interesting builtin functions to vectorize with more than two arguments
2749 except for fma. No arguments is also not good. */
2750 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2751 return false;
2752
74bf76ed
JJ
2753 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2754 if (gimple_call_internal_p (stmt)
2755 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2756 {
2757 nargs = 0;
2758 rhs_type = unsigned_type_node;
2759 }
2760
ebfd146a
IR
2761 for (i = 0; i < nargs; i++)
2762 {
b690cc0f
RG
2763 tree opvectype;
2764
ebfd146a
IR
2765 op = gimple_call_arg (stmt, i);
2766
2767 /* We can only handle calls with arguments of the same type. */
2768 if (rhs_type
8533c9d8 2769 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2770 {
73fbfcad 2771 if (dump_enabled_p ())
78c60e3d 2772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2773 "argument types differ.\n");
ebfd146a
IR
2774 return false;
2775 }
b690cc0f
RG
2776 if (!rhs_type)
2777 rhs_type = TREE_TYPE (op);
ebfd146a 2778
81c40241 2779 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2780 {
73fbfcad 2781 if (dump_enabled_p ())
78c60e3d 2782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2783 "use not simple.\n");
ebfd146a
IR
2784 return false;
2785 }
ebfd146a 2786
b690cc0f
RG
2787 if (!vectype_in)
2788 vectype_in = opvectype;
2789 else if (opvectype
2790 && opvectype != vectype_in)
2791 {
73fbfcad 2792 if (dump_enabled_p ())
78c60e3d 2793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2794 "argument vector types differ.\n");
b690cc0f
RG
2795 return false;
2796 }
2797 }
2798 /* If all arguments are external or constant defs use a vector type with
2799 the same size as the output vector type. */
ebfd146a 2800 if (!vectype_in)
b690cc0f 2801 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2802 if (vec_stmt)
2803 gcc_assert (vectype_in);
2804 if (!vectype_in)
2805 {
73fbfcad 2806 if (dump_enabled_p ())
7d8930a0 2807 {
78c60e3d
SS
2808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2809 "no vectype for scalar type ");
2810 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2811 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2812 }
2813
2814 return false;
2815 }
ebfd146a
IR
2816
2817 /* FORNOW */
b690cc0f
RG
2818 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2819 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 2820 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 2821 modifier = NARROW;
c7bda0f4 2822 else if (known_eq (nunits_out, nunits_in))
ebfd146a 2823 modifier = NONE;
c7bda0f4 2824 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
2825 modifier = WIDEN;
2826 else
2827 return false;
2828
70439f0d
RS
2829 /* We only handle functions that do not read or clobber memory. */
2830 if (gimple_vuse (stmt))
2831 {
2832 if (dump_enabled_p ())
2833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2834 "function reads from or writes to memory.\n");
2835 return false;
2836 }
2837
ebfd146a
IR
2838 /* For now, we only vectorize functions if a target specific builtin
2839 is available. TODO -- in some cases, it might be profitable to
2840 insert the calls for pieces of the vector, in order to be able
2841 to vectorize other operations in the loop. */
70439f0d
RS
2842 fndecl = NULL_TREE;
2843 internal_fn ifn = IFN_LAST;
2844 combined_fn cfn = gimple_call_combined_fn (stmt);
2845 tree callee = gimple_call_fndecl (stmt);
2846
2847 /* First try using an internal function. */
b1b6836e
RS
2848 tree_code convert_code = ERROR_MARK;
2849 if (cfn != CFN_LAST
2850 && (modifier == NONE
2851 || (modifier == NARROW
2852 && simple_integer_narrowing (vectype_out, vectype_in,
2853 &convert_code))))
70439f0d
RS
2854 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2855 vectype_in);
2856
2857 /* If that fails, try asking for a target-specific built-in function. */
2858 if (ifn == IFN_LAST)
2859 {
2860 if (cfn != CFN_LAST)
2861 fndecl = targetm.vectorize.builtin_vectorized_function
2862 (cfn, vectype_out, vectype_in);
2863 else
2864 fndecl = targetm.vectorize.builtin_md_vectorized_function
2865 (callee, vectype_out, vectype_in);
2866 }
2867
2868 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2869 {
70439f0d 2870 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2871 && !slp_node
2872 && loop_vinfo
2873 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2874 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2875 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2876 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2877 {
2878 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2879 { 0, 1, 2, ... vf - 1 } vector. */
2880 gcc_assert (nargs == 0);
2881 }
37b14185
RB
2882 else if (modifier == NONE
2883 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2884 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2885 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2886 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2887 vectype_in, dt);
74bf76ed
JJ
2888 else
2889 {
2890 if (dump_enabled_p ())
2891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2892 "function is not vectorizable.\n");
74bf76ed
JJ
2893 return false;
2894 }
ebfd146a
IR
2895 }
2896
fce57248 2897 if (slp_node)
190c2236 2898 ncopies = 1;
b1b6836e 2899 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 2900 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 2901 else
e8f142e2 2902 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
2903
2904 /* Sanity check: make sure that at least one copy of the vectorized stmt
2905 needs to be generated. */
2906 gcc_assert (ncopies >= 1);
2907
2908 if (!vec_stmt) /* transformation not required. */
2909 {
2910 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2911 if (dump_enabled_p ())
e645e942
TJ
2912 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2913 "\n");
4fc5ebf1 2914 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
b1b6836e
RS
2915 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2916 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2917 vec_promote_demote, stmt_info, 0, vect_body);
2918
ebfd146a
IR
2919 return true;
2920 }
2921
67b8dbac 2922 /* Transform. */
ebfd146a 2923
73fbfcad 2924 if (dump_enabled_p ())
e645e942 2925 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2926
2927 /* Handle def. */
2928 scalar_dest = gimple_call_lhs (stmt);
2929 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2930
2931 prev_stmt_info = NULL;
b1b6836e 2932 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2933 {
b1b6836e 2934 tree prev_res = NULL_TREE;
ebfd146a
IR
2935 for (j = 0; j < ncopies; ++j)
2936 {
2937 /* Build argument list for the vectorized call. */
2938 if (j == 0)
9771b263 2939 vargs.create (nargs);
ebfd146a 2940 else
9771b263 2941 vargs.truncate (0);
ebfd146a 2942
190c2236
JJ
2943 if (slp_node)
2944 {
ef062b13 2945 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2946 vec<tree> vec_oprnds0;
190c2236
JJ
2947
2948 for (i = 0; i < nargs; i++)
9771b263 2949 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 2950 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 2951 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2952
2953 /* Arguments are ready. Create the new vector stmt. */
9771b263 2954 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2955 {
2956 size_t k;
2957 for (k = 0; k < nargs; k++)
2958 {
37b5ec8f 2959 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2960 vargs[k] = vec_oprndsk[i];
190c2236 2961 }
b1b6836e
RS
2962 if (modifier == NARROW)
2963 {
2964 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
2965 gcall *call
2966 = gimple_build_call_internal_vec (ifn, vargs);
2967 gimple_call_set_lhs (call, half_res);
2968 gimple_call_set_nothrow (call, true);
2969 new_stmt = call;
b1b6836e
RS
2970 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2971 if ((i & 1) == 0)
2972 {
2973 prev_res = half_res;
2974 continue;
2975 }
2976 new_temp = make_ssa_name (vec_dest);
2977 new_stmt = gimple_build_assign (new_temp, convert_code,
2978 prev_res, half_res);
2979 }
70439f0d 2980 else
b1b6836e 2981 {
a844293d 2982 gcall *call;
b1b6836e 2983 if (ifn != IFN_LAST)
a844293d 2984 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 2985 else
a844293d
RS
2986 call = gimple_build_call_vec (fndecl, vargs);
2987 new_temp = make_ssa_name (vec_dest, call);
2988 gimple_call_set_lhs (call, new_temp);
2989 gimple_call_set_nothrow (call, true);
2990 new_stmt = call;
b1b6836e 2991 }
190c2236 2992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2993 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2994 }
2995
2996 for (i = 0; i < nargs; i++)
2997 {
37b5ec8f 2998 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2999 vec_oprndsi.release ();
190c2236 3000 }
190c2236
JJ
3001 continue;
3002 }
3003
ebfd146a
IR
3004 for (i = 0; i < nargs; i++)
3005 {
3006 op = gimple_call_arg (stmt, i);
3007 if (j == 0)
3008 vec_oprnd0
81c40241 3009 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3010 else
63827fb8
IR
3011 {
3012 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3013 vec_oprnd0
3014 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3015 }
ebfd146a 3016
9771b263 3017 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
3018 }
3019
74bf76ed
JJ
3020 if (gimple_call_internal_p (stmt)
3021 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3022 {
c7bda0f4 3023 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3024 tree new_var
0e22bb5a 3025 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3026 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3027 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3028 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3029 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3030 }
b1b6836e
RS
3031 else if (modifier == NARROW)
3032 {
3033 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3034 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3035 gimple_call_set_lhs (call, half_res);
3036 gimple_call_set_nothrow (call, true);
3037 new_stmt = call;
b1b6836e
RS
3038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3039 if ((j & 1) == 0)
3040 {
3041 prev_res = half_res;
3042 continue;
3043 }
3044 new_temp = make_ssa_name (vec_dest);
3045 new_stmt = gimple_build_assign (new_temp, convert_code,
3046 prev_res, half_res);
3047 }
74bf76ed
JJ
3048 else
3049 {
a844293d 3050 gcall *call;
70439f0d 3051 if (ifn != IFN_LAST)
a844293d 3052 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3053 else
a844293d 3054 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3055 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3056 gimple_call_set_lhs (call, new_temp);
3057 gimple_call_set_nothrow (call, true);
3058 new_stmt = call;
74bf76ed 3059 }
ebfd146a
IR
3060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3061
b1b6836e 3062 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3063 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3064 else
3065 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3066
3067 prev_stmt_info = vinfo_for_stmt (new_stmt);
3068 }
b1b6836e
RS
3069 }
3070 else if (modifier == NARROW)
3071 {
ebfd146a
IR
3072 for (j = 0; j < ncopies; ++j)
3073 {
3074 /* Build argument list for the vectorized call. */
3075 if (j == 0)
9771b263 3076 vargs.create (nargs * 2);
ebfd146a 3077 else
9771b263 3078 vargs.truncate (0);
ebfd146a 3079
190c2236
JJ
3080 if (slp_node)
3081 {
ef062b13 3082 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3083 vec<tree> vec_oprnds0;
190c2236
JJ
3084
3085 for (i = 0; i < nargs; i++)
9771b263 3086 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3087 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3088 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3089
3090 /* Arguments are ready. Create the new vector stmt. */
9771b263 3091 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3092 {
3093 size_t k;
9771b263 3094 vargs.truncate (0);
190c2236
JJ
3095 for (k = 0; k < nargs; k++)
3096 {
37b5ec8f 3097 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3098 vargs.quick_push (vec_oprndsk[i]);
3099 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3100 }
a844293d 3101 gcall *call;
70439f0d 3102 if (ifn != IFN_LAST)
a844293d 3103 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3104 else
a844293d
RS
3105 call = gimple_build_call_vec (fndecl, vargs);
3106 new_temp = make_ssa_name (vec_dest, call);
3107 gimple_call_set_lhs (call, new_temp);
3108 gimple_call_set_nothrow (call, true);
3109 new_stmt = call;
190c2236 3110 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3111 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3112 }
3113
3114 for (i = 0; i < nargs; i++)
3115 {
37b5ec8f 3116 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3117 vec_oprndsi.release ();
190c2236 3118 }
190c2236
JJ
3119 continue;
3120 }
3121
ebfd146a
IR
3122 for (i = 0; i < nargs; i++)
3123 {
3124 op = gimple_call_arg (stmt, i);
3125 if (j == 0)
3126 {
3127 vec_oprnd0
81c40241 3128 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3129 vec_oprnd1
63827fb8 3130 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3131 }
3132 else
3133 {
336ecb65 3134 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3135 vec_oprnd0
63827fb8 3136 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3137 vec_oprnd1
63827fb8 3138 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3139 }
3140
9771b263
DN
3141 vargs.quick_push (vec_oprnd0);
3142 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3143 }
3144
b1b6836e 3145 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3146 new_temp = make_ssa_name (vec_dest, new_stmt);
3147 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3148 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3149
3150 if (j == 0)
3151 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3152 else
3153 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3154
3155 prev_stmt_info = vinfo_for_stmt (new_stmt);
3156 }
3157
3158 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3159 }
b1b6836e
RS
3160 else
3161 /* No current target implements this case. */
3162 return false;
ebfd146a 3163
9771b263 3164 vargs.release ();
ebfd146a 3165
ebfd146a
IR
3166 /* The call in STMT might prevent it from being removed in dce.
3167 We however cannot remove it here, due to the way the ssa name
3168 it defines is mapped to the new definition. So just replace
3169 rhs of the statement with something harmless. */
3170
dd34c087
JJ
3171 if (slp_node)
3172 return true;
3173
ebfd146a 3174 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3175 if (is_pattern_stmt_p (stmt_info))
3176 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3177 else
3178 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3179
9d5e7640 3180 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3181 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3182 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3183 STMT_VINFO_STMT (stmt_info) = new_stmt;
3184 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3185
3186 return true;
3187}
3188
3189
0136f8f0
AH
3190struct simd_call_arg_info
3191{
3192 tree vectype;
3193 tree op;
0136f8f0 3194 HOST_WIDE_INT linear_step;
34e82342 3195 enum vect_def_type dt;
0136f8f0 3196 unsigned int align;
17b658af 3197 bool simd_lane_linear;
0136f8f0
AH
3198};
3199
17b658af
JJ
3200/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3201 is linear within simd lane (but not within whole loop), note it in
3202 *ARGINFO. */
3203
3204static void
3205vect_simd_lane_linear (tree op, struct loop *loop,
3206 struct simd_call_arg_info *arginfo)
3207{
355fe088 3208 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3209
3210 if (!is_gimple_assign (def_stmt)
3211 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3212 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3213 return;
3214
3215 tree base = gimple_assign_rhs1 (def_stmt);
3216 HOST_WIDE_INT linear_step = 0;
3217 tree v = gimple_assign_rhs2 (def_stmt);
3218 while (TREE_CODE (v) == SSA_NAME)
3219 {
3220 tree t;
3221 def_stmt = SSA_NAME_DEF_STMT (v);
3222 if (is_gimple_assign (def_stmt))
3223 switch (gimple_assign_rhs_code (def_stmt))
3224 {
3225 case PLUS_EXPR:
3226 t = gimple_assign_rhs2 (def_stmt);
3227 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3228 return;
3229 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3230 v = gimple_assign_rhs1 (def_stmt);
3231 continue;
3232 case MULT_EXPR:
3233 t = gimple_assign_rhs2 (def_stmt);
3234 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3235 return;
3236 linear_step = tree_to_shwi (t);
3237 v = gimple_assign_rhs1 (def_stmt);
3238 continue;
3239 CASE_CONVERT:
3240 t = gimple_assign_rhs1 (def_stmt);
3241 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3242 || (TYPE_PRECISION (TREE_TYPE (v))
3243 < TYPE_PRECISION (TREE_TYPE (t))))
3244 return;
3245 if (!linear_step)
3246 linear_step = 1;
3247 v = t;
3248 continue;
3249 default:
3250 return;
3251 }
8e4284d0 3252 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3253 && loop->simduid
3254 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3255 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3256 == loop->simduid))
3257 {
3258 if (!linear_step)
3259 linear_step = 1;
3260 arginfo->linear_step = linear_step;
3261 arginfo->op = base;
3262 arginfo->simd_lane_linear = true;
3263 return;
3264 }
3265 }
3266}
3267
cf1b2ba4
RS
3268/* Return the number of elements in vector type VECTYPE, which is associated
3269 with a SIMD clone. At present these vectors always have a constant
3270 length. */
3271
3272static unsigned HOST_WIDE_INT
3273simd_clone_subparts (tree vectype)
3274{
928686b1 3275 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3276}
3277
0136f8f0
AH
3278/* Function vectorizable_simd_clone_call.
3279
3280 Check if STMT performs a function call that can be vectorized
3281 by calling a simd clone of the function.
3282 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3283 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3284 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3285
3286static bool
355fe088
TS
3287vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3288 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
3289{
3290 tree vec_dest;
3291 tree scalar_dest;
3292 tree op, type;
3293 tree vec_oprnd0 = NULL_TREE;
3294 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3295 tree vectype;
3296 unsigned int nunits;
3297 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3298 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3299 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3300 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3301 tree fndecl, new_temp;
355fe088
TS
3302 gimple *def_stmt;
3303 gimple *new_stmt = NULL;
0136f8f0 3304 int ncopies, j;
00426f9a 3305 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3306 vec<tree> vargs = vNULL;
3307 size_t i, nargs;
3308 tree lhs, rtype, ratype;
e7a74006 3309 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3310
3311 /* Is STMT a vectorizable call? */
3312 if (!is_gimple_call (stmt))
3313 return false;
3314
3315 fndecl = gimple_call_fndecl (stmt);
3316 if (fndecl == NULL_TREE)
3317 return false;
3318
d52f5295 3319 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3320 if (node == NULL || node->simd_clones == NULL)
3321 return false;
3322
3323 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3324 return false;
3325
66c16fd9
RB
3326 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3327 && ! vec_stmt)
0136f8f0
AH
3328 return false;
3329
3330 if (gimple_call_lhs (stmt)
3331 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3332 return false;
3333
3334 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3335
3336 vectype = STMT_VINFO_VECTYPE (stmt_info);
3337
3338 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3339 return false;
3340
3341 /* FORNOW */
fce57248 3342 if (slp_node)
0136f8f0
AH
3343 return false;
3344
3345 /* Process function arguments. */
3346 nargs = gimple_call_num_args (stmt);
3347
3348 /* Bail out if the function has zero arguments. */
3349 if (nargs == 0)
3350 return false;
3351
00426f9a 3352 arginfo.reserve (nargs, true);
0136f8f0
AH
3353
3354 for (i = 0; i < nargs; i++)
3355 {
3356 simd_call_arg_info thisarginfo;
3357 affine_iv iv;
3358
3359 thisarginfo.linear_step = 0;
3360 thisarginfo.align = 0;
3361 thisarginfo.op = NULL_TREE;
17b658af 3362 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3363
3364 op = gimple_call_arg (stmt, i);
81c40241
RB
3365 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3366 &thisarginfo.vectype)
0136f8f0
AH
3367 || thisarginfo.dt == vect_uninitialized_def)
3368 {
3369 if (dump_enabled_p ())
3370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3371 "use not simple.\n");
0136f8f0
AH
3372 return false;
3373 }
3374
3375 if (thisarginfo.dt == vect_constant_def
3376 || thisarginfo.dt == vect_external_def)
3377 gcc_assert (thisarginfo.vectype == NULL_TREE);
3378 else
3379 gcc_assert (thisarginfo.vectype != NULL_TREE);
3380
6c9e85fb
JJ
3381 /* For linear arguments, the analyze phase should have saved
3382 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3383 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3384 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3385 {
3386 gcc_assert (vec_stmt);
3387 thisarginfo.linear_step
17b658af 3388 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3389 thisarginfo.op
17b658af
JJ
3390 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3391 thisarginfo.simd_lane_linear
3392 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3393 == boolean_true_node);
6c9e85fb
JJ
3394 /* If loop has been peeled for alignment, we need to adjust it. */
3395 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3396 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3397 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3398 {
3399 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3400 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3401 tree opt = TREE_TYPE (thisarginfo.op);
3402 bias = fold_convert (TREE_TYPE (step), bias);
3403 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3404 thisarginfo.op
3405 = fold_build2 (POINTER_TYPE_P (opt)
3406 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3407 thisarginfo.op, bias);
3408 }
3409 }
3410 else if (!vec_stmt
3411 && thisarginfo.dt != vect_constant_def
3412 && thisarginfo.dt != vect_external_def
3413 && loop_vinfo
3414 && TREE_CODE (op) == SSA_NAME
3415 && simple_iv (loop, loop_containing_stmt (stmt), op,
3416 &iv, false)
3417 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3418 {
3419 thisarginfo.linear_step = tree_to_shwi (iv.step);
3420 thisarginfo.op = iv.base;
3421 }
3422 else if ((thisarginfo.dt == vect_constant_def
3423 || thisarginfo.dt == vect_external_def)
3424 && POINTER_TYPE_P (TREE_TYPE (op)))
3425 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3426 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3427 linear too. */
3428 if (POINTER_TYPE_P (TREE_TYPE (op))
3429 && !thisarginfo.linear_step
3430 && !vec_stmt
3431 && thisarginfo.dt != vect_constant_def
3432 && thisarginfo.dt != vect_external_def
3433 && loop_vinfo
3434 && !slp_node
3435 && TREE_CODE (op) == SSA_NAME)
3436 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3437
3438 arginfo.quick_push (thisarginfo);
3439 }
3440
d9f21f6a
RS
3441 unsigned HOST_WIDE_INT vf;
3442 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3443 {
3444 if (dump_enabled_p ())
3445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3446 "not considering SIMD clones; not yet supported"
3447 " for variable-width vectors.\n");
3448 return NULL;
3449 }
3450
0136f8f0
AH
3451 unsigned int badness = 0;
3452 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3453 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3454 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3455 else
3456 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3457 n = n->simdclone->next_clone)
3458 {
3459 unsigned int this_badness = 0;
d9f21f6a 3460 if (n->simdclone->simdlen > vf
0136f8f0
AH
3461 || n->simdclone->nargs != nargs)
3462 continue;
d9f21f6a
RS
3463 if (n->simdclone->simdlen < vf)
3464 this_badness += (exact_log2 (vf)
0136f8f0
AH
3465 - exact_log2 (n->simdclone->simdlen)) * 1024;
3466 if (n->simdclone->inbranch)
3467 this_badness += 2048;
3468 int target_badness = targetm.simd_clone.usable (n);
3469 if (target_badness < 0)
3470 continue;
3471 this_badness += target_badness * 512;
3472 /* FORNOW: Have to add code to add the mask argument. */
3473 if (n->simdclone->inbranch)
3474 continue;
3475 for (i = 0; i < nargs; i++)
3476 {
3477 switch (n->simdclone->args[i].arg_type)
3478 {
3479 case SIMD_CLONE_ARG_TYPE_VECTOR:
3480 if (!useless_type_conversion_p
3481 (n->simdclone->args[i].orig_type,
3482 TREE_TYPE (gimple_call_arg (stmt, i))))
3483 i = -1;
3484 else if (arginfo[i].dt == vect_constant_def
3485 || arginfo[i].dt == vect_external_def
3486 || arginfo[i].linear_step)
3487 this_badness += 64;
3488 break;
3489 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3490 if (arginfo[i].dt != vect_constant_def
3491 && arginfo[i].dt != vect_external_def)
3492 i = -1;
3493 break;
3494 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3495 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3496 if (arginfo[i].dt == vect_constant_def
3497 || arginfo[i].dt == vect_external_def
3498 || (arginfo[i].linear_step
3499 != n->simdclone->args[i].linear_step))
3500 i = -1;
3501 break;
3502 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3503 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3504 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3505 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3506 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3507 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3508 /* FORNOW */
3509 i = -1;
3510 break;
3511 case SIMD_CLONE_ARG_TYPE_MASK:
3512 gcc_unreachable ();
3513 }
3514 if (i == (size_t) -1)
3515 break;
3516 if (n->simdclone->args[i].alignment > arginfo[i].align)
3517 {
3518 i = -1;
3519 break;
3520 }
3521 if (arginfo[i].align)
3522 this_badness += (exact_log2 (arginfo[i].align)
3523 - exact_log2 (n->simdclone->args[i].alignment));
3524 }
3525 if (i == (size_t) -1)
3526 continue;
3527 if (bestn == NULL || this_badness < badness)
3528 {
3529 bestn = n;
3530 badness = this_badness;
3531 }
3532 }
3533
3534 if (bestn == NULL)
00426f9a 3535 return false;
0136f8f0
AH
3536
3537 for (i = 0; i < nargs; i++)
3538 if ((arginfo[i].dt == vect_constant_def
3539 || arginfo[i].dt == vect_external_def)
3540 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3541 {
3542 arginfo[i].vectype
3543 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3544 i)));
3545 if (arginfo[i].vectype == NULL
cf1b2ba4 3546 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3547 > bestn->simdclone->simdlen))
00426f9a 3548 return false;
0136f8f0
AH
3549 }
3550
3551 fndecl = bestn->decl;
3552 nunits = bestn->simdclone->simdlen;
d9f21f6a 3553 ncopies = vf / nunits;
0136f8f0
AH
3554
3555 /* If the function isn't const, only allow it in simd loops where user
3556 has asserted that at least nunits consecutive iterations can be
3557 performed using SIMD instructions. */
3558 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3559 && gimple_vuse (stmt))
00426f9a 3560 return false;
0136f8f0
AH
3561
3562 /* Sanity check: make sure that at least one copy of the vectorized stmt
3563 needs to be generated. */
3564 gcc_assert (ncopies >= 1);
3565
3566 if (!vec_stmt) /* transformation not required. */
3567 {
6c9e85fb
JJ
3568 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3569 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3570 if ((bestn->simdclone->args[i].arg_type
3571 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3572 || (bestn->simdclone->args[i].arg_type
3573 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3574 {
17b658af 3575 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3576 + 1);
3577 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3578 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3579 ? size_type_node : TREE_TYPE (arginfo[i].op);
3580 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3581 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3582 tree sll = arginfo[i].simd_lane_linear
3583 ? boolean_true_node : boolean_false_node;
3584 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3585 }
0136f8f0
AH
3586 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3587 if (dump_enabled_p ())
3588 dump_printf_loc (MSG_NOTE, vect_location,
3589 "=== vectorizable_simd_clone_call ===\n");
3590/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3591 return true;
3592 }
3593
67b8dbac 3594 /* Transform. */
0136f8f0
AH
3595
3596 if (dump_enabled_p ())
3597 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3598
3599 /* Handle def. */
3600 scalar_dest = gimple_call_lhs (stmt);
3601 vec_dest = NULL_TREE;
3602 rtype = NULL_TREE;
3603 ratype = NULL_TREE;
3604 if (scalar_dest)
3605 {
3606 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3607 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3608 if (TREE_CODE (rtype) == ARRAY_TYPE)
3609 {
3610 ratype = rtype;
3611 rtype = TREE_TYPE (ratype);
3612 }
3613 }
3614
3615 prev_stmt_info = NULL;
3616 for (j = 0; j < ncopies; ++j)
3617 {
3618 /* Build argument list for the vectorized call. */
3619 if (j == 0)
3620 vargs.create (nargs);
3621 else
3622 vargs.truncate (0);
3623
3624 for (i = 0; i < nargs; i++)
3625 {
3626 unsigned int k, l, m, o;
3627 tree atype;
3628 op = gimple_call_arg (stmt, i);
3629 switch (bestn->simdclone->args[i].arg_type)
3630 {
3631 case SIMD_CLONE_ARG_TYPE_VECTOR:
3632 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 3633 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
3634 for (m = j * o; m < (j + 1) * o; m++)
3635 {
cf1b2ba4
RS
3636 if (simd_clone_subparts (atype)
3637 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 3638 {
73a699ae 3639 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
3640 k = (simd_clone_subparts (arginfo[i].vectype)
3641 / simd_clone_subparts (atype));
0136f8f0
AH
3642 gcc_assert ((k & (k - 1)) == 0);
3643 if (m == 0)
3644 vec_oprnd0
81c40241 3645 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3646 else
3647 {
3648 vec_oprnd0 = arginfo[i].op;
3649 if ((m & (k - 1)) == 0)
3650 vec_oprnd0
3651 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3652 vec_oprnd0);
3653 }
3654 arginfo[i].op = vec_oprnd0;
3655 vec_oprnd0
3656 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 3657 bitsize_int (prec),
0136f8f0
AH
3658 bitsize_int ((m & (k - 1)) * prec));
3659 new_stmt
b731b390 3660 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3661 vec_oprnd0);
3662 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3663 vargs.safe_push (gimple_assign_lhs (new_stmt));
3664 }
3665 else
3666 {
cf1b2ba4
RS
3667 k = (simd_clone_subparts (atype)
3668 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
3669 gcc_assert ((k & (k - 1)) == 0);
3670 vec<constructor_elt, va_gc> *ctor_elts;
3671 if (k != 1)
3672 vec_alloc (ctor_elts, k);
3673 else
3674 ctor_elts = NULL;
3675 for (l = 0; l < k; l++)
3676 {
3677 if (m == 0 && l == 0)
3678 vec_oprnd0
81c40241 3679 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3680 else
3681 vec_oprnd0
3682 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3683 arginfo[i].op);
3684 arginfo[i].op = vec_oprnd0;
3685 if (k == 1)
3686 break;
3687 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3688 vec_oprnd0);
3689 }
3690 if (k == 1)
3691 vargs.safe_push (vec_oprnd0);
3692 else
3693 {
3694 vec_oprnd0 = build_constructor (atype, ctor_elts);
3695 new_stmt
b731b390 3696 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3697 vec_oprnd0);
3698 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3699 vargs.safe_push (gimple_assign_lhs (new_stmt));
3700 }
3701 }
3702 }
3703 break;
3704 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3705 vargs.safe_push (op);
3706 break;
3707 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 3708 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3709 if (j == 0)
3710 {
3711 gimple_seq stmts;
3712 arginfo[i].op
3713 = force_gimple_operand (arginfo[i].op, &stmts, true,
3714 NULL_TREE);
3715 if (stmts != NULL)
3716 {
3717 basic_block new_bb;
3718 edge pe = loop_preheader_edge (loop);
3719 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3720 gcc_assert (!new_bb);
3721 }
17b658af
JJ
3722 if (arginfo[i].simd_lane_linear)
3723 {
3724 vargs.safe_push (arginfo[i].op);
3725 break;
3726 }
b731b390 3727 tree phi_res = copy_ssa_name (op);
538dd0b7 3728 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3729 set_vinfo_for_stmt (new_phi,
310213d4 3730 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3731 add_phi_arg (new_phi, arginfo[i].op,
3732 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3733 enum tree_code code
3734 = POINTER_TYPE_P (TREE_TYPE (op))
3735 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3736 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3737 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3738 widest_int cst
3739 = wi::mul (bestn->simdclone->args[i].linear_step,
3740 ncopies * nunits);
3741 tree tcst = wide_int_to_tree (type, cst);
b731b390 3742 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3743 new_stmt
3744 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3745 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3746 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3747 set_vinfo_for_stmt (new_stmt,
310213d4 3748 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3749 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3750 UNKNOWN_LOCATION);
3751 arginfo[i].op = phi_res;
3752 vargs.safe_push (phi_res);
3753 }
3754 else
3755 {
3756 enum tree_code code
3757 = POINTER_TYPE_P (TREE_TYPE (op))
3758 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3759 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3760 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3761 widest_int cst
3762 = wi::mul (bestn->simdclone->args[i].linear_step,
3763 j * nunits);
3764 tree tcst = wide_int_to_tree (type, cst);
b731b390 3765 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3766 new_stmt = gimple_build_assign (new_temp, code,
3767 arginfo[i].op, tcst);
0136f8f0
AH
3768 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3769 vargs.safe_push (new_temp);
3770 }
3771 break;
7adb26f2
JJ
3772 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3773 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 3774 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3775 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3776 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3777 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3778 default:
3779 gcc_unreachable ();
3780 }
3781 }
3782
3783 new_stmt = gimple_build_call_vec (fndecl, vargs);
3784 if (vec_dest)
3785 {
cf1b2ba4 3786 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 3787 if (ratype)
b731b390 3788 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
3789 else if (simd_clone_subparts (vectype)
3790 == simd_clone_subparts (rtype))
0136f8f0
AH
3791 new_temp = make_ssa_name (vec_dest, new_stmt);
3792 else
3793 new_temp = make_ssa_name (rtype, new_stmt);
3794 gimple_call_set_lhs (new_stmt, new_temp);
3795 }
3796 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3797
3798 if (vec_dest)
3799 {
cf1b2ba4 3800 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
3801 {
3802 unsigned int k, l;
73a699ae
RS
3803 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3804 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 3805 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
3806 gcc_assert ((k & (k - 1)) == 0);
3807 for (l = 0; l < k; l++)
3808 {
3809 tree t;
3810 if (ratype)
3811 {
3812 t = build_fold_addr_expr (new_temp);
3813 t = build2 (MEM_REF, vectype, t,
73a699ae 3814 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
3815 }
3816 else
3817 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 3818 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 3819 new_stmt
b731b390 3820 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3821 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3822 if (j == 0 && l == 0)
3823 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3824 else
3825 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3826
3827 prev_stmt_info = vinfo_for_stmt (new_stmt);
3828 }
3829
3830 if (ratype)
3831 {
3832 tree clobber = build_constructor (ratype, NULL);
3833 TREE_THIS_VOLATILE (clobber) = 1;
3834 new_stmt = gimple_build_assign (new_temp, clobber);
3835 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3836 }
3837 continue;
3838 }
cf1b2ba4 3839 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 3840 {
cf1b2ba4
RS
3841 unsigned int k = (simd_clone_subparts (vectype)
3842 / simd_clone_subparts (rtype));
0136f8f0
AH
3843 gcc_assert ((k & (k - 1)) == 0);
3844 if ((j & (k - 1)) == 0)
3845 vec_alloc (ret_ctor_elts, k);
3846 if (ratype)
3847 {
cf1b2ba4 3848 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
3849 for (m = 0; m < o; m++)
3850 {
3851 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3852 size_int (m), NULL_TREE, NULL_TREE);
3853 new_stmt
b731b390 3854 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3855 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3856 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3857 gimple_assign_lhs (new_stmt));
3858 }
3859 tree clobber = build_constructor (ratype, NULL);
3860 TREE_THIS_VOLATILE (clobber) = 1;
3861 new_stmt = gimple_build_assign (new_temp, clobber);
3862 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3863 }
3864 else
3865 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3866 if ((j & (k - 1)) != k - 1)
3867 continue;
3868 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3869 new_stmt
b731b390 3870 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3871 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3872
3873 if ((unsigned) j == k - 1)
3874 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3875 else
3876 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3877
3878 prev_stmt_info = vinfo_for_stmt (new_stmt);
3879 continue;
3880 }
3881 else if (ratype)
3882 {
3883 tree t = build_fold_addr_expr (new_temp);
3884 t = build2 (MEM_REF, vectype, t,
3885 build_int_cst (TREE_TYPE (t), 0));
3886 new_stmt
b731b390 3887 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3888 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3889 tree clobber = build_constructor (ratype, NULL);
3890 TREE_THIS_VOLATILE (clobber) = 1;
3891 vect_finish_stmt_generation (stmt,
3892 gimple_build_assign (new_temp,
3893 clobber), gsi);
3894 }
3895 }
3896
3897 if (j == 0)
3898 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3899 else
3900 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3901
3902 prev_stmt_info = vinfo_for_stmt (new_stmt);
3903 }
3904
3905 vargs.release ();
3906
3907 /* The call in STMT might prevent it from being removed in dce.
3908 We however cannot remove it here, due to the way the ssa name
3909 it defines is mapped to the new definition. So just replace
3910 rhs of the statement with something harmless. */
3911
3912 if (slp_node)
3913 return true;
3914
3915 if (scalar_dest)
3916 {
3917 type = TREE_TYPE (scalar_dest);
3918 if (is_pattern_stmt_p (stmt_info))
3919 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3920 else
3921 lhs = gimple_call_lhs (stmt);
3922 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3923 }
3924 else
3925 new_stmt = gimple_build_nop ();
3926 set_vinfo_for_stmt (new_stmt, stmt_info);
3927 set_vinfo_for_stmt (stmt, NULL);
3928 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3929 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3930 unlink_stmt_vdef (stmt);
3931
3932 return true;
3933}
3934
3935
ebfd146a
IR
3936/* Function vect_gen_widened_results_half
3937
3938 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3939 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3940 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3941 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3942 needs to be created (DECL is a function-decl of a target-builtin).
3943 STMT is the original scalar stmt that we are vectorizing. */
3944
355fe088 3945static gimple *
ebfd146a
IR
3946vect_gen_widened_results_half (enum tree_code code,
3947 tree decl,
3948 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3949 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3950 gimple *stmt)
b8698a0f 3951{
355fe088 3952 gimple *new_stmt;
b8698a0f
L
3953 tree new_temp;
3954
3955 /* Generate half of the widened result: */
3956 if (code == CALL_EXPR)
3957 {
3958 /* Target specific support */
ebfd146a
IR
3959 if (op_type == binary_op)
3960 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3961 else
3962 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3963 new_temp = make_ssa_name (vec_dest, new_stmt);
3964 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3965 }
3966 else
ebfd146a 3967 {
b8698a0f
L
3968 /* Generic support */
3969 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3970 if (op_type != binary_op)
3971 vec_oprnd1 = NULL;
0d0e4a03 3972 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3973 new_temp = make_ssa_name (vec_dest, new_stmt);
3974 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3975 }
ebfd146a
IR
3976 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3977
ebfd146a
IR
3978 return new_stmt;
3979}
3980
4a00c761
JJ
3981
3982/* Get vectorized definitions for loop-based vectorization. For the first
3983 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3984 scalar operand), and for the rest we get a copy with
3985 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3986 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3987 The vectors are collected into VEC_OPRNDS. */
3988
3989static void
355fe088 3990vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3991 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3992{
3993 tree vec_oprnd;
3994
3995 /* Get first vector operand. */
3996 /* All the vector operands except the very first one (that is scalar oprnd)
3997 are stmt copies. */
3998 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3999 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4000 else
4001 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4002
9771b263 4003 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4004
4005 /* Get second vector operand. */
4006 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4007 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4008
4009 *oprnd = vec_oprnd;
4010
4011 /* For conversion in multiple steps, continue to get operands
4012 recursively. */
4013 if (multi_step_cvt)
4014 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4015}
4016
4017
4018/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4019 For multi-step conversions store the resulting vectors and call the function
4020 recursively. */
4021
4022static void
9771b263 4023vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4024 int multi_step_cvt, gimple *stmt,
9771b263 4025 vec<tree> vec_dsts,
4a00c761
JJ
4026 gimple_stmt_iterator *gsi,
4027 slp_tree slp_node, enum tree_code code,
4028 stmt_vec_info *prev_stmt_info)
4029{
4030 unsigned int i;
4031 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4032 gimple *new_stmt;
4a00c761
JJ
4033 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4034
9771b263 4035 vec_dest = vec_dsts.pop ();
4a00c761 4036
9771b263 4037 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4038 {
4039 /* Create demotion operation. */
9771b263
DN
4040 vop0 = (*vec_oprnds)[i];
4041 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4042 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4043 new_tmp = make_ssa_name (vec_dest, new_stmt);
4044 gimple_assign_set_lhs (new_stmt, new_tmp);
4045 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4046
4047 if (multi_step_cvt)
4048 /* Store the resulting vector for next recursive call. */
9771b263 4049 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4050 else
4051 {
4052 /* This is the last step of the conversion sequence. Store the
4053 vectors in SLP_NODE or in vector info of the scalar statement
4054 (or in STMT_VINFO_RELATED_STMT chain). */
4055 if (slp_node)
9771b263 4056 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4057 else
c689ce1e
RB
4058 {
4059 if (!*prev_stmt_info)
4060 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4061 else
4062 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4063
c689ce1e
RB
4064 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4065 }
4a00c761
JJ
4066 }
4067 }
4068
4069 /* For multi-step demotion operations we first generate demotion operations
4070 from the source type to the intermediate types, and then combine the
4071 results (stored in VEC_OPRNDS) in demotion operation to the destination
4072 type. */
4073 if (multi_step_cvt)
4074 {
4075 /* At each level of recursion we have half of the operands we had at the
4076 previous level. */
9771b263 4077 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4078 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4079 stmt, vec_dsts, gsi, slp_node,
4080 VEC_PACK_TRUNC_EXPR,
4081 prev_stmt_info);
4082 }
4083
9771b263 4084 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4085}
4086
4087
4088/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4089 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4090 the resulting vectors and call the function recursively. */
4091
4092static void
9771b263
DN
4093vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4094 vec<tree> *vec_oprnds1,
355fe088 4095 gimple *stmt, tree vec_dest,
4a00c761
JJ
4096 gimple_stmt_iterator *gsi,
4097 enum tree_code code1,
4098 enum tree_code code2, tree decl1,
4099 tree decl2, int op_type)
4100{
4101 int i;
4102 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4103 gimple *new_stmt1, *new_stmt2;
6e1aa848 4104 vec<tree> vec_tmp = vNULL;
4a00c761 4105
9771b263
DN
4106 vec_tmp.create (vec_oprnds0->length () * 2);
4107 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4108 {
4109 if (op_type == binary_op)
9771b263 4110 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4111 else
4112 vop1 = NULL_TREE;
4113
4114 /* Generate the two halves of promotion operation. */
4115 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4116 op_type, vec_dest, gsi, stmt);
4117 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4118 op_type, vec_dest, gsi, stmt);
4119 if (is_gimple_call (new_stmt1))
4120 {
4121 new_tmp1 = gimple_call_lhs (new_stmt1);
4122 new_tmp2 = gimple_call_lhs (new_stmt2);
4123 }
4124 else
4125 {
4126 new_tmp1 = gimple_assign_lhs (new_stmt1);
4127 new_tmp2 = gimple_assign_lhs (new_stmt2);
4128 }
4129
4130 /* Store the results for the next step. */
9771b263
DN
4131 vec_tmp.quick_push (new_tmp1);
4132 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4133 }
4134
689eaba3 4135 vec_oprnds0->release ();
4a00c761
JJ
4136 *vec_oprnds0 = vec_tmp;
4137}
4138
4139
b8698a0f
L
4140/* Check if STMT performs a conversion operation, that can be vectorized.
4141 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4142 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4143 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4144
4145static bool
355fe088
TS
4146vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4147 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4148{
4149 tree vec_dest;
4150 tree scalar_dest;
4a00c761 4151 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4152 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4154 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4155 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4156 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4157 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4158 tree new_temp;
355fe088 4159 gimple *def_stmt;
ebfd146a 4160 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4161 int ndts = 2;
355fe088 4162 gimple *new_stmt = NULL;
ebfd146a 4163 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4164 poly_uint64 nunits_in;
4165 poly_uint64 nunits_out;
ebfd146a 4166 tree vectype_out, vectype_in;
4a00c761
JJ
4167 int ncopies, i, j;
4168 tree lhs_type, rhs_type;
ebfd146a 4169 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4170 vec<tree> vec_oprnds0 = vNULL;
4171 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4172 tree vop0;
4a00c761 4173 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4174 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4175 int multi_step_cvt = 0;
6e1aa848 4176 vec<tree> interm_types = vNULL;
4a00c761
JJ
4177 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4178 int op_type;
4a00c761 4179 unsigned short fltsz;
ebfd146a
IR
4180
4181 /* Is STMT a vectorizable conversion? */
4182
4a00c761 4183 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4184 return false;
4185
66c16fd9
RB
4186 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4187 && ! vec_stmt)
ebfd146a
IR
4188 return false;
4189
4190 if (!is_gimple_assign (stmt))
4191 return false;
4192
4193 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4194 return false;
4195
4196 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4197 if (!CONVERT_EXPR_CODE_P (code)
4198 && code != FIX_TRUNC_EXPR
4199 && code != FLOAT_EXPR
4200 && code != WIDEN_MULT_EXPR
4201 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4202 return false;
4203
4a00c761
JJ
4204 op_type = TREE_CODE_LENGTH (code);
4205
ebfd146a 4206 /* Check types of lhs and rhs. */
b690cc0f 4207 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4208 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4209 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4210
ebfd146a
IR
4211 op0 = gimple_assign_rhs1 (stmt);
4212 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4213
4214 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4215 && !((INTEGRAL_TYPE_P (lhs_type)
4216 && INTEGRAL_TYPE_P (rhs_type))
4217 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4218 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4219 return false;
4220
e6f5c25d
IE
4221 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4222 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4223 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4224 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4225 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4226 {
73fbfcad 4227 if (dump_enabled_p ())
78c60e3d 4228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4229 "type conversion to/from bit-precision unsupported."
4230 "\n");
4a00c761
JJ
4231 return false;
4232 }
4233
b690cc0f 4234 /* Check the operands of the operation. */
81c40241 4235 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4236 {
73fbfcad 4237 if (dump_enabled_p ())
78c60e3d 4238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4239 "use not simple.\n");
b690cc0f
RG
4240 return false;
4241 }
4a00c761
JJ
4242 if (op_type == binary_op)
4243 {
4244 bool ok;
4245
4246 op1 = gimple_assign_rhs2 (stmt);
4247 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4248 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4249 OP1. */
4250 if (CONSTANT_CLASS_P (op0))
81c40241 4251 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4252 else
81c40241 4253 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4254
4255 if (!ok)
4256 {
73fbfcad 4257 if (dump_enabled_p ())
78c60e3d 4258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4259 "use not simple.\n");
4a00c761
JJ
4260 return false;
4261 }
4262 }
4263
b690cc0f
RG
4264 /* If op0 is an external or constant defs use a vector type of
4265 the same size as the output vector type. */
ebfd146a 4266 if (!vectype_in)
b690cc0f 4267 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4268 if (vec_stmt)
4269 gcc_assert (vectype_in);
4270 if (!vectype_in)
4271 {
73fbfcad 4272 if (dump_enabled_p ())
4a00c761 4273 {
78c60e3d
SS
4274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4275 "no vectype for scalar type ");
4276 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4277 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4278 }
7d8930a0
IR
4279
4280 return false;
4281 }
ebfd146a 4282
e6f5c25d
IE
4283 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4284 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4285 {
4286 if (dump_enabled_p ())
4287 {
4288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4289 "can't convert between boolean and non "
4290 "boolean vectors");
4291 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4293 }
4294
4295 return false;
4296 }
4297
b690cc0f
RG
4298 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4299 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4300 if (known_eq (nunits_out, nunits_in))
ebfd146a 4301 modifier = NONE;
062d5ccc
RS
4302 else if (multiple_p (nunits_out, nunits_in))
4303 modifier = NARROW;
ebfd146a 4304 else
062d5ccc
RS
4305 {
4306 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4307 modifier = WIDEN;
4308 }
ebfd146a 4309
ff802fa1
IR
4310 /* Multiple types in SLP are handled by creating the appropriate number of
4311 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4312 case of SLP. */
fce57248 4313 if (slp_node)
ebfd146a 4314 ncopies = 1;
4a00c761 4315 else if (modifier == NARROW)
e8f142e2 4316 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4317 else
e8f142e2 4318 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4319
ebfd146a
IR
4320 /* Sanity check: make sure that at least one copy of the vectorized stmt
4321 needs to be generated. */
4322 gcc_assert (ncopies >= 1);
4323
16d22000
RS
4324 bool found_mode = false;
4325 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4326 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4327 opt_scalar_mode rhs_mode_iter;
b397965c 4328
ebfd146a 4329 /* Supportable by target? */
4a00c761 4330 switch (modifier)
ebfd146a 4331 {
4a00c761
JJ
4332 case NONE:
4333 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4334 return false;
4335 if (supportable_convert_operation (code, vectype_out, vectype_in,
4336 &decl1, &code1))
4337 break;
4338 /* FALLTHRU */
4339 unsupported:
73fbfcad 4340 if (dump_enabled_p ())
78c60e3d 4341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4342 "conversion not supported by target.\n");
ebfd146a 4343 return false;
ebfd146a 4344
4a00c761
JJ
4345 case WIDEN:
4346 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4347 &code1, &code2, &multi_step_cvt,
4348 &interm_types))
4a00c761
JJ
4349 {
4350 /* Binary widening operation can only be supported directly by the
4351 architecture. */
4352 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4353 break;
4354 }
4355
4356 if (code != FLOAT_EXPR
b397965c 4357 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4358 goto unsupported;
4359
b397965c 4360 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4361 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4362 {
16d22000 4363 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4364 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4365 break;
4366
4a00c761
JJ
4367 cvt_type
4368 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4369 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4370 if (cvt_type == NULL_TREE)
4371 goto unsupported;
4372
4373 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4374 {
4375 if (!supportable_convert_operation (code, vectype_out,
4376 cvt_type, &decl1, &codecvt1))
4377 goto unsupported;
4378 }
4379 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4380 cvt_type, &codecvt1,
4381 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4382 &interm_types))
4383 continue;
4384 else
4385 gcc_assert (multi_step_cvt == 0);
4386
4387 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4388 vectype_in, &code1, &code2,
4389 &multi_step_cvt, &interm_types))
16d22000
RS
4390 {
4391 found_mode = true;
4392 break;
4393 }
4a00c761
JJ
4394 }
4395
16d22000 4396 if (!found_mode)
4a00c761
JJ
4397 goto unsupported;
4398
4399 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4400 codecvt2 = ERROR_MARK;
4401 else
4402 {
4403 multi_step_cvt++;
9771b263 4404 interm_types.safe_push (cvt_type);
4a00c761
JJ
4405 cvt_type = NULL_TREE;
4406 }
4407 break;
4408
4409 case NARROW:
4410 gcc_assert (op_type == unary_op);
4411 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4412 &code1, &multi_step_cvt,
4413 &interm_types))
4414 break;
4415
4416 if (code != FIX_TRUNC_EXPR
b397965c 4417 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4418 goto unsupported;
4419
4a00c761
JJ
4420 cvt_type
4421 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4422 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4423 if (cvt_type == NULL_TREE)
4424 goto unsupported;
4425 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4426 &decl1, &codecvt1))
4427 goto unsupported;
4428 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4429 &code1, &multi_step_cvt,
4430 &interm_types))
4431 break;
4432 goto unsupported;
4433
4434 default:
4435 gcc_unreachable ();
ebfd146a
IR
4436 }
4437
4438 if (!vec_stmt) /* transformation not required. */
4439 {
73fbfcad 4440 if (dump_enabled_p ())
78c60e3d 4441 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4442 "=== vectorizable_conversion ===\n");
4a00c761 4443 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4444 {
4445 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4fc5ebf1 4446 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
8bd37302 4447 }
4a00c761
JJ
4448 else if (modifier == NARROW)
4449 {
4450 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 4451 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
4452 }
4453 else
4454 {
4455 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 4456 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 4457 }
9771b263 4458 interm_types.release ();
ebfd146a
IR
4459 return true;
4460 }
4461
67b8dbac 4462 /* Transform. */
73fbfcad 4463 if (dump_enabled_p ())
78c60e3d 4464 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4465 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4466
4a00c761
JJ
4467 if (op_type == binary_op)
4468 {
4469 if (CONSTANT_CLASS_P (op0))
4470 op0 = fold_convert (TREE_TYPE (op1), op0);
4471 else if (CONSTANT_CLASS_P (op1))
4472 op1 = fold_convert (TREE_TYPE (op0), op1);
4473 }
4474
4475 /* In case of multi-step conversion, we first generate conversion operations
4476 to the intermediate types, and then from that types to the final one.
4477 We create vector destinations for the intermediate type (TYPES) received
4478 from supportable_*_operation, and store them in the correct order
4479 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4480 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4481 vec_dest = vect_create_destination_var (scalar_dest,
4482 (cvt_type && modifier == WIDEN)
4483 ? cvt_type : vectype_out);
9771b263 4484 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4485
4486 if (multi_step_cvt)
4487 {
9771b263
DN
4488 for (i = interm_types.length () - 1;
4489 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4490 {
4491 vec_dest = vect_create_destination_var (scalar_dest,
4492 intermediate_type);
9771b263 4493 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4494 }
4495 }
ebfd146a 4496
4a00c761 4497 if (cvt_type)
82294ec1
JJ
4498 vec_dest = vect_create_destination_var (scalar_dest,
4499 modifier == WIDEN
4500 ? vectype_out : cvt_type);
4a00c761
JJ
4501
4502 if (!slp_node)
4503 {
30862efc 4504 if (modifier == WIDEN)
4a00c761 4505 {
c3284718 4506 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4507 if (op_type == binary_op)
9771b263 4508 vec_oprnds1.create (1);
4a00c761 4509 }
30862efc 4510 else if (modifier == NARROW)
9771b263
DN
4511 vec_oprnds0.create (
4512 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4513 }
4514 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4515 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4516
4a00c761 4517 last_oprnd = op0;
ebfd146a
IR
4518 prev_stmt_info = NULL;
4519 switch (modifier)
4520 {
4521 case NONE:
4522 for (j = 0; j < ncopies; j++)
4523 {
ebfd146a 4524 if (j == 0)
306b0c92 4525 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4526 else
4527 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4528
9771b263 4529 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4530 {
4531 /* Arguments are ready, create the new vector stmt. */
4532 if (code1 == CALL_EXPR)
4533 {
4534 new_stmt = gimple_build_call (decl1, 1, vop0);
4535 new_temp = make_ssa_name (vec_dest, new_stmt);
4536 gimple_call_set_lhs (new_stmt, new_temp);
4537 }
4538 else
4539 {
4540 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4541 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4542 new_temp = make_ssa_name (vec_dest, new_stmt);
4543 gimple_assign_set_lhs (new_stmt, new_temp);
4544 }
4545
4546 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4547 if (slp_node)
9771b263 4548 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4549 else
4550 {
4551 if (!prev_stmt_info)
4552 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4553 else
4554 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4555 prev_stmt_info = vinfo_for_stmt (new_stmt);
4556 }
4a00c761 4557 }
ebfd146a
IR
4558 }
4559 break;
4560
4561 case WIDEN:
4562 /* In case the vectorization factor (VF) is bigger than the number
4563 of elements that we can fit in a vectype (nunits), we have to
4564 generate more than one vector stmt - i.e - we need to "unroll"
4565 the vector stmt by a factor VF/nunits. */
4566 for (j = 0; j < ncopies; j++)
4567 {
4a00c761 4568 /* Handle uses. */
ebfd146a 4569 if (j == 0)
4a00c761
JJ
4570 {
4571 if (slp_node)
4572 {
4573 if (code == WIDEN_LSHIFT_EXPR)
4574 {
4575 unsigned int k;
ebfd146a 4576
4a00c761
JJ
4577 vec_oprnd1 = op1;
4578 /* Store vec_oprnd1 for every vector stmt to be created
4579 for SLP_NODE. We check during the analysis that all
4580 the shift arguments are the same. */
4581 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4582 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4583
4584 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4585 slp_node);
4a00c761
JJ
4586 }
4587 else
4588 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4589 &vec_oprnds1, slp_node);
4a00c761
JJ
4590 }
4591 else
4592 {
81c40241 4593 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4594 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4595 if (op_type == binary_op)
4596 {
4597 if (code == WIDEN_LSHIFT_EXPR)
4598 vec_oprnd1 = op1;
4599 else
81c40241 4600 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4601 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4602 }
4603 }
4604 }
ebfd146a 4605 else
4a00c761
JJ
4606 {
4607 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4608 vec_oprnds0.truncate (0);
4609 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4610 if (op_type == binary_op)
4611 {
4612 if (code == WIDEN_LSHIFT_EXPR)
4613 vec_oprnd1 = op1;
4614 else
4615 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4616 vec_oprnd1);
9771b263
DN
4617 vec_oprnds1.truncate (0);
4618 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4619 }
4620 }
ebfd146a 4621
4a00c761
JJ
4622 /* Arguments are ready. Create the new vector stmts. */
4623 for (i = multi_step_cvt; i >= 0; i--)
4624 {
9771b263 4625 tree this_dest = vec_dsts[i];
4a00c761
JJ
4626 enum tree_code c1 = code1, c2 = code2;
4627 if (i == 0 && codecvt2 != ERROR_MARK)
4628 {
4629 c1 = codecvt1;
4630 c2 = codecvt2;
4631 }
4632 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4633 &vec_oprnds1,
4634 stmt, this_dest, gsi,
4635 c1, c2, decl1, decl2,
4636 op_type);
4637 }
4638
9771b263 4639 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4640 {
4641 if (cvt_type)
4642 {
4643 if (codecvt1 == CALL_EXPR)
4644 {
4645 new_stmt = gimple_build_call (decl1, 1, vop0);
4646 new_temp = make_ssa_name (vec_dest, new_stmt);
4647 gimple_call_set_lhs (new_stmt, new_temp);
4648 }
4649 else
4650 {
4651 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4652 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4653 new_stmt = gimple_build_assign (new_temp, codecvt1,
4654 vop0);
4a00c761
JJ
4655 }
4656
4657 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4658 }
4659 else
4660 new_stmt = SSA_NAME_DEF_STMT (vop0);
4661
4662 if (slp_node)
9771b263 4663 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4664 else
c689ce1e
RB
4665 {
4666 if (!prev_stmt_info)
4667 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4668 else
4669 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4670 prev_stmt_info = vinfo_for_stmt (new_stmt);
4671 }
4a00c761 4672 }
ebfd146a 4673 }
4a00c761
JJ
4674
4675 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4676 break;
4677
4678 case NARROW:
4679 /* In case the vectorization factor (VF) is bigger than the number
4680 of elements that we can fit in a vectype (nunits), we have to
4681 generate more than one vector stmt - i.e - we need to "unroll"
4682 the vector stmt by a factor VF/nunits. */
4683 for (j = 0; j < ncopies; j++)
4684 {
4685 /* Handle uses. */
4a00c761
JJ
4686 if (slp_node)
4687 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4688 slp_node);
ebfd146a
IR
4689 else
4690 {
9771b263 4691 vec_oprnds0.truncate (0);
4a00c761
JJ
4692 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4693 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4694 }
4695
4a00c761
JJ
4696 /* Arguments are ready. Create the new vector stmts. */
4697 if (cvt_type)
9771b263 4698 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4699 {
4700 if (codecvt1 == CALL_EXPR)
4701 {
4702 new_stmt = gimple_build_call (decl1, 1, vop0);
4703 new_temp = make_ssa_name (vec_dest, new_stmt);
4704 gimple_call_set_lhs (new_stmt, new_temp);
4705 }
4706 else
4707 {
4708 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4709 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4710 new_stmt = gimple_build_assign (new_temp, codecvt1,
4711 vop0);
4a00c761 4712 }
ebfd146a 4713
4a00c761 4714 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4715 vec_oprnds0[i] = new_temp;
4a00c761 4716 }
ebfd146a 4717
4a00c761
JJ
4718 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4719 stmt, vec_dsts, gsi,
4720 slp_node, code1,
4721 &prev_stmt_info);
ebfd146a
IR
4722 }
4723
4724 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4725 break;
ebfd146a
IR
4726 }
4727
9771b263
DN
4728 vec_oprnds0.release ();
4729 vec_oprnds1.release ();
9771b263 4730 interm_types.release ();
ebfd146a
IR
4731
4732 return true;
4733}
ff802fa1
IR
4734
4735
ebfd146a
IR
4736/* Function vectorizable_assignment.
4737
b8698a0f
L
4738 Check if STMT performs an assignment (copy) that can be vectorized.
4739 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4740 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4741 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4742
4743static bool
355fe088
TS
4744vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4745 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4746{
4747 tree vec_dest;
4748 tree scalar_dest;
4749 tree op;
4750 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4751 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4752 tree new_temp;
355fe088 4753 gimple *def_stmt;
4fc5ebf1
JG
4754 enum vect_def_type dt[1] = {vect_unknown_def_type};
4755 int ndts = 1;
ebfd146a 4756 int ncopies;
f18b55bd 4757 int i, j;
6e1aa848 4758 vec<tree> vec_oprnds = vNULL;
ebfd146a 4759 tree vop;
a70d6342 4760 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4761 vec_info *vinfo = stmt_info->vinfo;
355fe088 4762 gimple *new_stmt = NULL;
f18b55bd 4763 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4764 enum tree_code code;
4765 tree vectype_in;
ebfd146a 4766
a70d6342 4767 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4768 return false;
4769
66c16fd9
RB
4770 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4771 && ! vec_stmt)
ebfd146a
IR
4772 return false;
4773
4774 /* Is vectorizable assignment? */
4775 if (!is_gimple_assign (stmt))
4776 return false;
4777
4778 scalar_dest = gimple_assign_lhs (stmt);
4779 if (TREE_CODE (scalar_dest) != SSA_NAME)
4780 return false;
4781
fde9c428 4782 code = gimple_assign_rhs_code (stmt);
ebfd146a 4783 if (gimple_assign_single_p (stmt)
fde9c428
RG
4784 || code == PAREN_EXPR
4785 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4786 op = gimple_assign_rhs1 (stmt);
4787 else
4788 return false;
4789
7b7ec6c5
RG
4790 if (code == VIEW_CONVERT_EXPR)
4791 op = TREE_OPERAND (op, 0);
4792
465c8c19 4793 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 4794 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
4795
4796 /* Multiple types in SLP are handled by creating the appropriate number of
4797 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4798 case of SLP. */
fce57248 4799 if (slp_node)
465c8c19
JJ
4800 ncopies = 1;
4801 else
e8f142e2 4802 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
4803
4804 gcc_assert (ncopies >= 1);
4805
81c40241 4806 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4807 {
73fbfcad 4808 if (dump_enabled_p ())
78c60e3d 4809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4810 "use not simple.\n");
ebfd146a
IR
4811 return false;
4812 }
4813
fde9c428
RG
4814 /* We can handle NOP_EXPR conversions that do not change the number
4815 of elements or the vector size. */
7b7ec6c5
RG
4816 if ((CONVERT_EXPR_CODE_P (code)
4817 || code == VIEW_CONVERT_EXPR)
fde9c428 4818 && (!vectype_in
928686b1 4819 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
4820 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
4821 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
4822 return false;
4823
7b7b1813
RG
4824 /* We do not handle bit-precision changes. */
4825 if ((CONVERT_EXPR_CODE_P (code)
4826 || code == VIEW_CONVERT_EXPR)
4827 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
4828 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4829 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
4830 /* But a conversion that does not change the bit-pattern is ok. */
4831 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4832 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4833 && TYPE_UNSIGNED (TREE_TYPE (op)))
4834 /* Conversion between boolean types of different sizes is
4835 a simple assignment in case their vectypes are same
4836 boolean vectors. */
4837 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4838 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4839 {
73fbfcad 4840 if (dump_enabled_p ())
78c60e3d
SS
4841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4842 "type conversion to/from bit-precision "
e645e942 4843 "unsupported.\n");
7b7b1813
RG
4844 return false;
4845 }
4846
ebfd146a
IR
4847 if (!vec_stmt) /* transformation not required. */
4848 {
4849 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4850 if (dump_enabled_p ())
78c60e3d 4851 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4852 "=== vectorizable_assignment ===\n");
4fc5ebf1 4853 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
4854 return true;
4855 }
4856
67b8dbac 4857 /* Transform. */
73fbfcad 4858 if (dump_enabled_p ())
e645e942 4859 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4860
4861 /* Handle def. */
4862 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4863
4864 /* Handle use. */
f18b55bd 4865 for (j = 0; j < ncopies; j++)
ebfd146a 4866 {
f18b55bd
IR
4867 /* Handle uses. */
4868 if (j == 0)
306b0c92 4869 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
4870 else
4871 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4872
4873 /* Arguments are ready. create the new vector stmt. */
9771b263 4874 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4875 {
7b7ec6c5
RG
4876 if (CONVERT_EXPR_CODE_P (code)
4877 || code == VIEW_CONVERT_EXPR)
4a73490d 4878 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4879 new_stmt = gimple_build_assign (vec_dest, vop);
4880 new_temp = make_ssa_name (vec_dest, new_stmt);
4881 gimple_assign_set_lhs (new_stmt, new_temp);
4882 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4883 if (slp_node)
9771b263 4884 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4885 }
ebfd146a
IR
4886
4887 if (slp_node)
f18b55bd
IR
4888 continue;
4889
4890 if (j == 0)
4891 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4892 else
4893 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4894
4895 prev_stmt_info = vinfo_for_stmt (new_stmt);
4896 }
b8698a0f 4897
9771b263 4898 vec_oprnds.release ();
ebfd146a
IR
4899 return true;
4900}
4901
9dc3f7de 4902
1107f3ae
IR
4903/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4904 either as shift by a scalar or by a vector. */
4905
4906bool
4907vect_supportable_shift (enum tree_code code, tree scalar_type)
4908{
4909
ef4bddc2 4910 machine_mode vec_mode;
1107f3ae
IR
4911 optab optab;
4912 int icode;
4913 tree vectype;
4914
4915 vectype = get_vectype_for_scalar_type (scalar_type);
4916 if (!vectype)
4917 return false;
4918
4919 optab = optab_for_tree_code (code, vectype, optab_scalar);
4920 if (!optab
4921 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4922 {
4923 optab = optab_for_tree_code (code, vectype, optab_vector);
4924 if (!optab
4925 || (optab_handler (optab, TYPE_MODE (vectype))
4926 == CODE_FOR_nothing))
4927 return false;
4928 }
4929
4930 vec_mode = TYPE_MODE (vectype);
4931 icode = (int) optab_handler (optab, vec_mode);
4932 if (icode == CODE_FOR_nothing)
4933 return false;
4934
4935 return true;
4936}
4937
4938
9dc3f7de
IR
4939/* Function vectorizable_shift.
4940
4941 Check if STMT performs a shift operation that can be vectorized.
4942 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4943 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4944 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4945
4946static bool
355fe088
TS
4947vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4948 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4949{
4950 tree vec_dest;
4951 tree scalar_dest;
4952 tree op0, op1 = NULL;
4953 tree vec_oprnd1 = NULL_TREE;
4954 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4955 tree vectype;
4956 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4957 enum tree_code code;
ef4bddc2 4958 machine_mode vec_mode;
9dc3f7de
IR
4959 tree new_temp;
4960 optab optab;
4961 int icode;
ef4bddc2 4962 machine_mode optab_op2_mode;
355fe088 4963 gimple *def_stmt;
9dc3f7de 4964 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4965 int ndts = 2;
355fe088 4966 gimple *new_stmt = NULL;
9dc3f7de 4967 stmt_vec_info prev_stmt_info;
928686b1
RS
4968 poly_uint64 nunits_in;
4969 poly_uint64 nunits_out;
9dc3f7de 4970 tree vectype_out;
cede2577 4971 tree op1_vectype;
9dc3f7de
IR
4972 int ncopies;
4973 int j, i;
6e1aa848
DN
4974 vec<tree> vec_oprnds0 = vNULL;
4975 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4976 tree vop0, vop1;
4977 unsigned int k;
49eab32e 4978 bool scalar_shift_arg = true;
9dc3f7de 4979 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4980 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4981
4982 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4983 return false;
4984
66c16fd9
RB
4985 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4986 && ! vec_stmt)
9dc3f7de
IR
4987 return false;
4988
4989 /* Is STMT a vectorizable binary/unary operation? */
4990 if (!is_gimple_assign (stmt))
4991 return false;
4992
4993 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4994 return false;
4995
4996 code = gimple_assign_rhs_code (stmt);
4997
4998 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4999 || code == RROTATE_EXPR))
5000 return false;
5001
5002 scalar_dest = gimple_assign_lhs (stmt);
5003 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5004 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5005 {
73fbfcad 5006 if (dump_enabled_p ())
78c60e3d 5007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5008 "bit-precision shifts not supported.\n");
7b7b1813
RG
5009 return false;
5010 }
9dc3f7de
IR
5011
5012 op0 = gimple_assign_rhs1 (stmt);
81c40241 5013 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 5014 {
73fbfcad 5015 if (dump_enabled_p ())
78c60e3d 5016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5017 "use not simple.\n");
9dc3f7de
IR
5018 return false;
5019 }
5020 /* If op0 is an external or constant def use a vector type with
5021 the same size as the output vector type. */
5022 if (!vectype)
5023 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5024 if (vec_stmt)
5025 gcc_assert (vectype);
5026 if (!vectype)
5027 {
73fbfcad 5028 if (dump_enabled_p ())
78c60e3d 5029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5030 "no vectype for scalar type\n");
9dc3f7de
IR
5031 return false;
5032 }
5033
5034 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5035 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5036 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5037 return false;
5038
5039 op1 = gimple_assign_rhs2 (stmt);
81c40241 5040 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 5041 {
73fbfcad 5042 if (dump_enabled_p ())
78c60e3d 5043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5044 "use not simple.\n");
9dc3f7de
IR
5045 return false;
5046 }
5047
9dc3f7de
IR
5048 /* Multiple types in SLP are handled by creating the appropriate number of
5049 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5050 case of SLP. */
fce57248 5051 if (slp_node)
9dc3f7de
IR
5052 ncopies = 1;
5053 else
e8f142e2 5054 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5055
5056 gcc_assert (ncopies >= 1);
5057
5058 /* Determine whether the shift amount is a vector, or scalar. If the
5059 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5060
dbfa87aa
YR
5061 if ((dt[1] == vect_internal_def
5062 || dt[1] == vect_induction_def)
5063 && !slp_node)
49eab32e
JJ
5064 scalar_shift_arg = false;
5065 else if (dt[1] == vect_constant_def
5066 || dt[1] == vect_external_def
5067 || dt[1] == vect_internal_def)
5068 {
5069 /* In SLP, need to check whether the shift count is the same,
5070 in loops if it is a constant or invariant, it is always
5071 a scalar shift. */
5072 if (slp_node)
5073 {
355fe088
TS
5074 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5075 gimple *slpstmt;
49eab32e 5076
9771b263 5077 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5078 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5079 scalar_shift_arg = false;
5080 }
60d393e8
RB
5081
5082 /* If the shift amount is computed by a pattern stmt we cannot
5083 use the scalar amount directly thus give up and use a vector
5084 shift. */
5085 if (dt[1] == vect_internal_def)
5086 {
5087 gimple *def = SSA_NAME_DEF_STMT (op1);
5088 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5089 scalar_shift_arg = false;
5090 }
49eab32e
JJ
5091 }
5092 else
5093 {
73fbfcad 5094 if (dump_enabled_p ())
78c60e3d 5095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5096 "operand mode requires invariant argument.\n");
49eab32e
JJ
5097 return false;
5098 }
5099
9dc3f7de 5100 /* Vector shifted by vector. */
49eab32e 5101 if (!scalar_shift_arg)
9dc3f7de
IR
5102 {
5103 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5104 if (dump_enabled_p ())
78c60e3d 5105 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5106 "vector/vector shift/rotate found.\n");
78c60e3d 5107
aa948027
JJ
5108 if (!op1_vectype)
5109 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5110 if (op1_vectype == NULL_TREE
5111 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5112 {
73fbfcad 5113 if (dump_enabled_p ())
78c60e3d
SS
5114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5115 "unusable type for last operand in"
e645e942 5116 " vector/vector shift/rotate.\n");
cede2577
JJ
5117 return false;
5118 }
9dc3f7de
IR
5119 }
5120 /* See if the machine has a vector shifted by scalar insn and if not
5121 then see if it has a vector shifted by vector insn. */
49eab32e 5122 else
9dc3f7de
IR
5123 {
5124 optab = optab_for_tree_code (code, vectype, optab_scalar);
5125 if (optab
5126 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5127 {
73fbfcad 5128 if (dump_enabled_p ())
78c60e3d 5129 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5130 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5131 }
5132 else
5133 {
5134 optab = optab_for_tree_code (code, vectype, optab_vector);
5135 if (optab
5136 && (optab_handler (optab, TYPE_MODE (vectype))
5137 != CODE_FOR_nothing))
5138 {
49eab32e
JJ
5139 scalar_shift_arg = false;
5140
73fbfcad 5141 if (dump_enabled_p ())
78c60e3d 5142 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5143 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5144
5145 /* Unlike the other binary operators, shifts/rotates have
5146 the rhs being int, instead of the same type as the lhs,
5147 so make sure the scalar is the right type if we are
aa948027 5148 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5149 if (dt[1] == vect_constant_def)
5150 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5151 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5152 TREE_TYPE (op1)))
5153 {
5154 if (slp_node
5155 && TYPE_MODE (TREE_TYPE (vectype))
5156 != TYPE_MODE (TREE_TYPE (op1)))
5157 {
73fbfcad 5158 if (dump_enabled_p ())
78c60e3d
SS
5159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5160 "unusable type for last operand in"
e645e942 5161 " vector/vector shift/rotate.\n");
21c0a521 5162 return false;
aa948027
JJ
5163 }
5164 if (vec_stmt && !slp_node)
5165 {
5166 op1 = fold_convert (TREE_TYPE (vectype), op1);
5167 op1 = vect_init_vector (stmt, op1,
5168 TREE_TYPE (vectype), NULL);
5169 }
5170 }
9dc3f7de
IR
5171 }
5172 }
5173 }
9dc3f7de
IR
5174
5175 /* Supportable by target? */
5176 if (!optab)
5177 {
73fbfcad 5178 if (dump_enabled_p ())
78c60e3d 5179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5180 "no optab.\n");
9dc3f7de
IR
5181 return false;
5182 }
5183 vec_mode = TYPE_MODE (vectype);
5184 icode = (int) optab_handler (optab, vec_mode);
5185 if (icode == CODE_FOR_nothing)
5186 {
73fbfcad 5187 if (dump_enabled_p ())
78c60e3d 5188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5189 "op not supported by target.\n");
9dc3f7de 5190 /* Check only during analysis. */
cf098191 5191 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5192 || (!vec_stmt
5193 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5194 return false;
73fbfcad 5195 if (dump_enabled_p ())
e645e942
TJ
5196 dump_printf_loc (MSG_NOTE, vect_location,
5197 "proceeding using word mode.\n");
9dc3f7de
IR
5198 }
5199
5200 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5201 if (!vec_stmt
5202 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5203 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5204 {
73fbfcad 5205 if (dump_enabled_p ())
78c60e3d 5206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5207 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5208 return false;
5209 }
5210
5211 if (!vec_stmt) /* transformation not required. */
5212 {
5213 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5214 if (dump_enabled_p ())
e645e942
TJ
5215 dump_printf_loc (MSG_NOTE, vect_location,
5216 "=== vectorizable_shift ===\n");
4fc5ebf1 5217 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
9dc3f7de
IR
5218 return true;
5219 }
5220
67b8dbac 5221 /* Transform. */
9dc3f7de 5222
73fbfcad 5223 if (dump_enabled_p ())
78c60e3d 5224 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5225 "transform binary/unary operation.\n");
9dc3f7de
IR
5226
5227 /* Handle def. */
5228 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5229
9dc3f7de
IR
5230 prev_stmt_info = NULL;
5231 for (j = 0; j < ncopies; j++)
5232 {
5233 /* Handle uses. */
5234 if (j == 0)
5235 {
5236 if (scalar_shift_arg)
5237 {
5238 /* Vector shl and shr insn patterns can be defined with scalar
5239 operand 2 (shift operand). In this case, use constant or loop
5240 invariant op1 directly, without extending it to vector mode
5241 first. */
5242 optab_op2_mode = insn_data[icode].operand[2].mode;
5243 if (!VECTOR_MODE_P (optab_op2_mode))
5244 {
73fbfcad 5245 if (dump_enabled_p ())
78c60e3d 5246 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5247 "operand 1 using scalar mode.\n");
9dc3f7de 5248 vec_oprnd1 = op1;
8930f723 5249 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5250 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5251 if (slp_node)
5252 {
5253 /* Store vec_oprnd1 for every vector stmt to be created
5254 for SLP_NODE. We check during the analysis that all
5255 the shift arguments are the same.
5256 TODO: Allow different constants for different vector
5257 stmts generated for an SLP instance. */
5258 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5259 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5260 }
5261 }
5262 }
5263
5264 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5265 (a special case for certain kind of vector shifts); otherwise,
5266 operand 1 should be of a vector type (the usual case). */
5267 if (vec_oprnd1)
5268 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5269 slp_node);
9dc3f7de
IR
5270 else
5271 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5272 slp_node);
9dc3f7de
IR
5273 }
5274 else
5275 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5276
5277 /* Arguments are ready. Create the new vector stmt. */
9771b263 5278 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5279 {
9771b263 5280 vop1 = vec_oprnds1[i];
0d0e4a03 5281 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5282 new_temp = make_ssa_name (vec_dest, new_stmt);
5283 gimple_assign_set_lhs (new_stmt, new_temp);
5284 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5285 if (slp_node)
9771b263 5286 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5287 }
5288
5289 if (slp_node)
5290 continue;
5291
5292 if (j == 0)
5293 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5294 else
5295 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5296 prev_stmt_info = vinfo_for_stmt (new_stmt);
5297 }
5298
9771b263
DN
5299 vec_oprnds0.release ();
5300 vec_oprnds1.release ();
9dc3f7de
IR
5301
5302 return true;
5303}
5304
5305
ebfd146a
IR
5306/* Function vectorizable_operation.
5307
16949072
RG
5308 Check if STMT performs a binary, unary or ternary operation that can
5309 be vectorized.
b8698a0f 5310 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5311 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5312 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5313
5314static bool
355fe088
TS
5315vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5316 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 5317{
00f07b86 5318 tree vec_dest;
ebfd146a 5319 tree scalar_dest;
16949072 5320 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5321 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5322 tree vectype;
ebfd146a 5323 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5324 enum tree_code code, orig_code;
ef4bddc2 5325 machine_mode vec_mode;
ebfd146a
IR
5326 tree new_temp;
5327 int op_type;
00f07b86 5328 optab optab;
523ba738 5329 bool target_support_p;
355fe088 5330 gimple *def_stmt;
16949072
RG
5331 enum vect_def_type dt[3]
5332 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5333 int ndts = 3;
355fe088 5334 gimple *new_stmt = NULL;
ebfd146a 5335 stmt_vec_info prev_stmt_info;
928686b1
RS
5336 poly_uint64 nunits_in;
5337 poly_uint64 nunits_out;
ebfd146a
IR
5338 tree vectype_out;
5339 int ncopies;
5340 int j, i;
6e1aa848
DN
5341 vec<tree> vec_oprnds0 = vNULL;
5342 vec<tree> vec_oprnds1 = vNULL;
5343 vec<tree> vec_oprnds2 = vNULL;
16949072 5344 tree vop0, vop1, vop2;
a70d6342 5345 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5346 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5347
a70d6342 5348 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5349 return false;
5350
66c16fd9
RB
5351 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5352 && ! vec_stmt)
ebfd146a
IR
5353 return false;
5354
5355 /* Is STMT a vectorizable binary/unary operation? */
5356 if (!is_gimple_assign (stmt))
5357 return false;
5358
5359 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5360 return false;
5361
0eb952ea 5362 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5363
1af4ebf5
MG
5364 /* For pointer addition and subtraction, we should use the normal
5365 plus and minus for the vector operation. */
ebfd146a
IR
5366 if (code == POINTER_PLUS_EXPR)
5367 code = PLUS_EXPR;
1af4ebf5
MG
5368 if (code == POINTER_DIFF_EXPR)
5369 code = MINUS_EXPR;
ebfd146a
IR
5370
5371 /* Support only unary or binary operations. */
5372 op_type = TREE_CODE_LENGTH (code);
16949072 5373 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5374 {
73fbfcad 5375 if (dump_enabled_p ())
78c60e3d 5376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5377 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5378 op_type);
ebfd146a
IR
5379 return false;
5380 }
5381
b690cc0f
RG
5382 scalar_dest = gimple_assign_lhs (stmt);
5383 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5384
7b7b1813
RG
5385 /* Most operations cannot handle bit-precision types without extra
5386 truncations. */
045c1278 5387 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5388 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5389 /* Exception are bitwise binary operations. */
5390 && code != BIT_IOR_EXPR
5391 && code != BIT_XOR_EXPR
5392 && code != BIT_AND_EXPR)
5393 {
73fbfcad 5394 if (dump_enabled_p ())
78c60e3d 5395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5396 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5397 return false;
5398 }
5399
ebfd146a 5400 op0 = gimple_assign_rhs1 (stmt);
81c40241 5401 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5402 {
73fbfcad 5403 if (dump_enabled_p ())
78c60e3d 5404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5405 "use not simple.\n");
ebfd146a
IR
5406 return false;
5407 }
b690cc0f
RG
5408 /* If op0 is an external or constant def use a vector type with
5409 the same size as the output vector type. */
5410 if (!vectype)
b036c6c5
IE
5411 {
5412 /* For boolean type we cannot determine vectype by
5413 invariant value (don't know whether it is a vector
5414 of booleans or vector of integers). We use output
5415 vectype because operations on boolean don't change
5416 type. */
2568d8a1 5417 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5418 {
2568d8a1 5419 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5420 {
5421 if (dump_enabled_p ())
5422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5423 "not supported operation on bool value.\n");
5424 return false;
5425 }
5426 vectype = vectype_out;
5427 }
5428 else
5429 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5430 }
7d8930a0
IR
5431 if (vec_stmt)
5432 gcc_assert (vectype);
5433 if (!vectype)
5434 {
73fbfcad 5435 if (dump_enabled_p ())
7d8930a0 5436 {
78c60e3d
SS
5437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5438 "no vectype for scalar type ");
5439 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5440 TREE_TYPE (op0));
e645e942 5441 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5442 }
5443
5444 return false;
5445 }
b690cc0f
RG
5446
5447 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5448 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5449 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5450 return false;
ebfd146a 5451
16949072 5452 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5453 {
5454 op1 = gimple_assign_rhs2 (stmt);
81c40241 5455 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5456 {
73fbfcad 5457 if (dump_enabled_p ())
78c60e3d 5458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5459 "use not simple.\n");
ebfd146a
IR
5460 return false;
5461 }
5462 }
16949072
RG
5463 if (op_type == ternary_op)
5464 {
5465 op2 = gimple_assign_rhs3 (stmt);
81c40241 5466 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5467 {
73fbfcad 5468 if (dump_enabled_p ())
78c60e3d 5469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5470 "use not simple.\n");
16949072
RG
5471 return false;
5472 }
5473 }
ebfd146a 5474
b690cc0f 5475 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5476 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5477 case of SLP. */
fce57248 5478 if (slp_node)
b690cc0f
RG
5479 ncopies = 1;
5480 else
e8f142e2 5481 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5482
5483 gcc_assert (ncopies >= 1);
5484
9dc3f7de 5485 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5486 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5487 || code == RROTATE_EXPR)
9dc3f7de 5488 return false;
ebfd146a 5489
ebfd146a 5490 /* Supportable by target? */
00f07b86
RH
5491
5492 vec_mode = TYPE_MODE (vectype);
5493 if (code == MULT_HIGHPART_EXPR)
523ba738 5494 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5495 else
5496 {
5497 optab = optab_for_tree_code (code, vectype, optab_default);
5498 if (!optab)
5deb57cb 5499 {
73fbfcad 5500 if (dump_enabled_p ())
78c60e3d 5501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5502 "no optab.\n");
00f07b86 5503 return false;
5deb57cb 5504 }
523ba738
RS
5505 target_support_p = (optab_handler (optab, vec_mode)
5506 != CODE_FOR_nothing);
5deb57cb
JJ
5507 }
5508
523ba738 5509 if (!target_support_p)
ebfd146a 5510 {
73fbfcad 5511 if (dump_enabled_p ())
78c60e3d 5512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5513 "op not supported by target.\n");
ebfd146a 5514 /* Check only during analysis. */
cf098191 5515 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5516 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5517 return false;
73fbfcad 5518 if (dump_enabled_p ())
e645e942
TJ
5519 dump_printf_loc (MSG_NOTE, vect_location,
5520 "proceeding using word mode.\n");
383d9c83
IR
5521 }
5522
4a00c761 5523 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5524 if (!VECTOR_MODE_P (vec_mode)
5525 && !vec_stmt
ca09abcb 5526 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5527 {
73fbfcad 5528 if (dump_enabled_p ())
78c60e3d 5529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5530 "not worthwhile without SIMD support.\n");
e34842c6 5531 return false;
7d8930a0 5532 }
ebfd146a 5533
ebfd146a
IR
5534 if (!vec_stmt) /* transformation not required. */
5535 {
4a00c761 5536 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5537 if (dump_enabled_p ())
78c60e3d 5538 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5539 "=== vectorizable_operation ===\n");
4fc5ebf1 5540 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5541 return true;
5542 }
5543
67b8dbac 5544 /* Transform. */
ebfd146a 5545
73fbfcad 5546 if (dump_enabled_p ())
78c60e3d 5547 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5548 "transform binary/unary operation.\n");
383d9c83 5549
ebfd146a 5550 /* Handle def. */
00f07b86 5551 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5552
0eb952ea
JJ
5553 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5554 vectors with unsigned elements, but the result is signed. So, we
5555 need to compute the MINUS_EXPR into vectype temporary and
5556 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5557 tree vec_cvt_dest = NULL_TREE;
5558 if (orig_code == POINTER_DIFF_EXPR)
5559 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5560
ebfd146a
IR
5561 /* In case the vectorization factor (VF) is bigger than the number
5562 of elements that we can fit in a vectype (nunits), we have to generate
5563 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5564 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5565 from one copy of the vector stmt to the next, in the field
5566 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5567 stages to find the correct vector defs to be used when vectorizing
5568 stmts that use the defs of the current stmt. The example below
5569 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5570 we need to create 4 vectorized stmts):
5571
5572 before vectorization:
5573 RELATED_STMT VEC_STMT
5574 S1: x = memref - -
5575 S2: z = x + 1 - -
5576
5577 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5578 there):
5579 RELATED_STMT VEC_STMT
5580 VS1_0: vx0 = memref0 VS1_1 -
5581 VS1_1: vx1 = memref1 VS1_2 -
5582 VS1_2: vx2 = memref2 VS1_3 -
5583 VS1_3: vx3 = memref3 - -
5584 S1: x = load - VS1_0
5585 S2: z = x + 1 - -
5586
5587 step2: vectorize stmt S2 (done here):
5588 To vectorize stmt S2 we first need to find the relevant vector
5589 def for the first operand 'x'. This is, as usual, obtained from
5590 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5591 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5592 relevant vector def 'vx0'. Having found 'vx0' we can generate
5593 the vector stmt VS2_0, and as usual, record it in the
5594 STMT_VINFO_VEC_STMT of stmt S2.
5595 When creating the second copy (VS2_1), we obtain the relevant vector
5596 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5597 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5598 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5599 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5600 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5601 chain of stmts and pointers:
5602 RELATED_STMT VEC_STMT
5603 VS1_0: vx0 = memref0 VS1_1 -
5604 VS1_1: vx1 = memref1 VS1_2 -
5605 VS1_2: vx2 = memref2 VS1_3 -
5606 VS1_3: vx3 = memref3 - -
5607 S1: x = load - VS1_0
5608 VS2_0: vz0 = vx0 + v1 VS2_1 -
5609 VS2_1: vz1 = vx1 + v1 VS2_2 -
5610 VS2_2: vz2 = vx2 + v1 VS2_3 -
5611 VS2_3: vz3 = vx3 + v1 - -
5612 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5613
5614 prev_stmt_info = NULL;
5615 for (j = 0; j < ncopies; j++)
5616 {
5617 /* Handle uses. */
5618 if (j == 0)
4a00c761
JJ
5619 {
5620 if (op_type == binary_op || op_type == ternary_op)
5621 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5622 slp_node);
4a00c761
JJ
5623 else
5624 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5625 slp_node);
4a00c761 5626 if (op_type == ternary_op)
c392943c 5627 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
306b0c92 5628 slp_node);
4a00c761 5629 }
ebfd146a 5630 else
4a00c761
JJ
5631 {
5632 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5633 if (op_type == ternary_op)
5634 {
9771b263
DN
5635 tree vec_oprnd = vec_oprnds2.pop ();
5636 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5637 vec_oprnd));
4a00c761
JJ
5638 }
5639 }
5640
5641 /* Arguments are ready. Create the new vector stmt. */
9771b263 5642 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5643 {
4a00c761 5644 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5645 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5646 vop2 = ((op_type == ternary_op)
9771b263 5647 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5648 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5649 new_temp = make_ssa_name (vec_dest, new_stmt);
5650 gimple_assign_set_lhs (new_stmt, new_temp);
5651 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
5652 if (vec_cvt_dest)
5653 {
5654 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5655 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5656 new_temp);
5657 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5658 gimple_assign_set_lhs (new_stmt, new_temp);
5659 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5660 }
4a00c761 5661 if (slp_node)
9771b263 5662 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5663 }
5664
4a00c761
JJ
5665 if (slp_node)
5666 continue;
5667
5668 if (j == 0)
5669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5670 else
5671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5672 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5673 }
5674
9771b263
DN
5675 vec_oprnds0.release ();
5676 vec_oprnds1.release ();
5677 vec_oprnds2.release ();
ebfd146a 5678
ebfd146a
IR
5679 return true;
5680}
5681
f702e7d4 5682/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
5683
5684static void
f702e7d4 5685ensure_base_align (struct data_reference *dr)
c716e67f
XDL
5686{
5687 if (!dr->aux)
5688 return;
5689
52639a61 5690 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 5691 {
52639a61 5692 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5693
f702e7d4
RS
5694 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5695
428f0c67 5696 if (decl_in_symtab_p (base_decl))
f702e7d4 5697 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
5698 else
5699 {
f702e7d4 5700 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
5701 DECL_USER_ALIGN (base_decl) = 1;
5702 }
52639a61 5703 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5704 }
5705}
5706
ebfd146a 5707
44fc7854
BE
5708/* Function get_group_alias_ptr_type.
5709
5710 Return the alias type for the group starting at FIRST_STMT. */
5711
5712static tree
5713get_group_alias_ptr_type (gimple *first_stmt)
5714{
5715 struct data_reference *first_dr, *next_dr;
5716 gimple *next_stmt;
5717
5718 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5719 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5720 while (next_stmt)
5721 {
5722 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5723 if (get_alias_set (DR_REF (first_dr))
5724 != get_alias_set (DR_REF (next_dr)))
5725 {
5726 if (dump_enabled_p ())
5727 dump_printf_loc (MSG_NOTE, vect_location,
5728 "conflicting alias set types.\n");
5729 return ptr_type_node;
5730 }
5731 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5732 }
5733 return reference_alias_ptr_type (DR_REF (first_dr));
5734}
5735
5736
ebfd146a
IR
5737/* Function vectorizable_store.
5738
b8698a0f
L
5739 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5740 can be vectorized.
5741 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5742 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5743 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5744
5745static bool
355fe088 5746vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5747 slp_tree slp_node)
ebfd146a 5748{
ebfd146a
IR
5749 tree data_ref;
5750 tree op;
5751 tree vec_oprnd = NULL_TREE;
5752 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5753 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5754 tree elem_type;
ebfd146a 5755 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5756 struct loop *loop = NULL;
ef4bddc2 5757 machine_mode vec_mode;
ebfd146a
IR
5758 tree dummy;
5759 enum dr_alignment_support alignment_support_scheme;
355fe088 5760 gimple *def_stmt;
ebfd146a
IR
5761 enum vect_def_type dt;
5762 stmt_vec_info prev_stmt_info = NULL;
5763 tree dataref_ptr = NULL_TREE;
74bf76ed 5764 tree dataref_offset = NULL_TREE;
355fe088 5765 gimple *ptr_incr = NULL;
ebfd146a
IR
5766 int ncopies;
5767 int j;
2de001ee
RS
5768 gimple *next_stmt, *first_stmt;
5769 bool grouped_store;
ebfd146a 5770 unsigned int group_size, i;
6e1aa848
DN
5771 vec<tree> oprnds = vNULL;
5772 vec<tree> result_chain = vNULL;
ebfd146a 5773 bool inv_p;
09dfa495 5774 tree offset = NULL_TREE;
6e1aa848 5775 vec<tree> vec_oprnds = vNULL;
ebfd146a 5776 bool slp = (slp_node != NULL);
ebfd146a 5777 unsigned int vec_num;
a70d6342 5778 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5779 vec_info *vinfo = stmt_info->vinfo;
272c6793 5780 tree aggr_type;
134c85ca 5781 gather_scatter_info gs_info;
3bab6342 5782 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5783 gimple *new_stmt;
d9f21f6a 5784 poly_uint64 vf;
2de001ee 5785 vec_load_store_type vls_type;
44fc7854 5786 tree ref_type;
a70d6342 5787
a70d6342 5788 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5789 return false;
5790
66c16fd9
RB
5791 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5792 && ! vec_stmt)
ebfd146a
IR
5793 return false;
5794
5795 /* Is vectorizable store? */
5796
c3a8f964
RS
5797 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
5798 if (is_gimple_assign (stmt))
5799 {
5800 tree scalar_dest = gimple_assign_lhs (stmt);
5801 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5802 && is_pattern_stmt_p (stmt_info))
5803 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5804 if (TREE_CODE (scalar_dest) != ARRAY_REF
5805 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5806 && TREE_CODE (scalar_dest) != INDIRECT_REF
5807 && TREE_CODE (scalar_dest) != COMPONENT_REF
5808 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5809 && TREE_CODE (scalar_dest) != REALPART_EXPR
5810 && TREE_CODE (scalar_dest) != MEM_REF)
5811 return false;
5812 }
5813 else
5814 {
5815 gcall *call = dyn_cast <gcall *> (stmt);
5816 if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
5817 return false;
ebfd146a 5818
c3a8f964
RS
5819 if (slp_node != NULL)
5820 {
5821 if (dump_enabled_p ())
5822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5823 "SLP of masked stores not supported.\n");
5824 return false;
5825 }
5826
5827 ref_type = TREE_TYPE (gimple_call_arg (call, 1));
5828 mask = gimple_call_arg (call, 2);
5829 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
5830 return false;
5831 }
5832
5833 op = vect_get_store_rhs (stmt);
ebfd146a 5834
fce57248
RS
5835 /* Cannot have hybrid store SLP -- that would mean storing to the
5836 same location twice. */
5837 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5838
f4d09712 5839 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 5840 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5841
5842 if (loop_vinfo)
b17dc4d4
RB
5843 {
5844 loop = LOOP_VINFO_LOOP (loop_vinfo);
5845 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5846 }
5847 else
5848 vf = 1;
465c8c19
JJ
5849
5850 /* Multiple types in SLP are handled by creating the appropriate number of
5851 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5852 case of SLP. */
fce57248 5853 if (slp)
465c8c19
JJ
5854 ncopies = 1;
5855 else
e8f142e2 5856 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5857
5858 gcc_assert (ncopies >= 1);
5859
5860 /* FORNOW. This restriction should be relaxed. */
5861 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5862 {
5863 if (dump_enabled_p ())
5864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5865 "multiple types in nested loop.\n");
5866 return false;
5867 }
5868
3133c3b6 5869 if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type))
f4d09712
KY
5870 return false;
5871
272c6793 5872 elem_type = TREE_TYPE (vectype);
ebfd146a 5873 vec_mode = TYPE_MODE (vectype);
7b7b1813 5874
ebfd146a
IR
5875 if (!STMT_VINFO_DATA_REF (stmt_info))
5876 return false;
5877
2de001ee 5878 vect_memory_access_type memory_access_type;
7e11fc7f 5879 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
5880 &memory_access_type, &gs_info))
5881 return false;
3bab6342 5882
c3a8f964
RS
5883 if (mask)
5884 {
7e11fc7f
RS
5885 if (memory_access_type == VMAT_CONTIGUOUS)
5886 {
5887 if (!VECTOR_MODE_P (vec_mode)
5888 || !can_vec_mask_load_store_p (vec_mode,
5889 TYPE_MODE (mask_vectype), false))
5890 return false;
5891 }
5892 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
c3a8f964
RS
5893 {
5894 if (dump_enabled_p ())
5895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5896 "unsupported access type for masked store.\n");
5897 return false;
5898 }
c3a8f964
RS
5899 }
5900 else
5901 {
5902 /* FORNOW. In some cases can vectorize even if data-type not supported
5903 (e.g. - array initialization with 0). */
5904 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5905 return false;
5906 }
5907
7cfb4d93
RS
5908 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5909 if (grouped_store)
5910 {
5911 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5912 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5913 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5914 }
5915 else
5916 {
5917 first_stmt = stmt;
5918 first_dr = dr;
5919 group_size = vec_num = 1;
5920 }
5921
ebfd146a
IR
5922 if (!vec_stmt) /* transformation not required. */
5923 {
2de001ee 5924 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
5925
5926 if (loop_vinfo
5927 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
5928 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
5929 memory_access_type);
5930
ebfd146a 5931 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5932 /* The SLP costs are calculated during SLP analysis. */
5933 if (!PURE_SLP_STMT (stmt_info))
9ce4345a
RS
5934 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
5935 vls_type, NULL, NULL, NULL);
ebfd146a
IR
5936 return true;
5937 }
2de001ee 5938 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 5939
67b8dbac 5940 /* Transform. */
ebfd146a 5941
f702e7d4 5942 ensure_base_align (dr);
c716e67f 5943
2de001ee 5944 if (memory_access_type == VMAT_GATHER_SCATTER)
3bab6342 5945 {
c3a8f964 5946 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 5947 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
5948 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5949 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5950 edge pe = loop_preheader_edge (loop);
5951 gimple_seq seq;
5952 basic_block new_bb;
5953 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
5954 poly_uint64 scatter_off_nunits
5955 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 5956
4d694b27 5957 if (known_eq (nunits, scatter_off_nunits))
3bab6342 5958 modifier = NONE;
4d694b27 5959 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 5960 {
3bab6342
AT
5961 modifier = WIDEN;
5962
4d694b27
RS
5963 /* Currently gathers and scatters are only supported for
5964 fixed-length vectors. */
5965 unsigned int count = scatter_off_nunits.to_constant ();
5966 vec_perm_builder sel (count, count, 1);
5967 for (i = 0; i < (unsigned int) count; ++i)
5968 sel.quick_push (i | (count / 2));
3bab6342 5969
4d694b27 5970 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
5971 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5972 indices);
3bab6342
AT
5973 gcc_assert (perm_mask != NULL_TREE);
5974 }
4d694b27 5975 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 5976 {
3bab6342
AT
5977 modifier = NARROW;
5978
4d694b27
RS
5979 /* Currently gathers and scatters are only supported for
5980 fixed-length vectors. */
5981 unsigned int count = nunits.to_constant ();
5982 vec_perm_builder sel (count, count, 1);
5983 for (i = 0; i < (unsigned int) count; ++i)
5984 sel.quick_push (i | (count / 2));
3bab6342 5985
4d694b27 5986 vec_perm_indices indices (sel, 2, count);
e3342de4 5987 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
5988 gcc_assert (perm_mask != NULL_TREE);
5989 ncopies *= 2;
5990 }
5991 else
5992 gcc_unreachable ();
5993
134c85ca 5994 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
5995 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5996 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5997 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5998 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5999 scaletype = TREE_VALUE (arglist);
6000
6001 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6002 && TREE_CODE (rettype) == VOID_TYPE);
6003
134c85ca 6004 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6005 if (!is_gimple_min_invariant (ptr))
6006 {
6007 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6008 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6009 gcc_assert (!new_bb);
6010 }
6011
6012 /* Currently we support only unconditional scatter stores,
6013 so mask should be all ones. */
6014 mask = build_int_cst (masktype, -1);
6015 mask = vect_init_vector (stmt, mask, masktype, NULL);
6016
134c85ca 6017 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6018
6019 prev_stmt_info = NULL;
6020 for (j = 0; j < ncopies; ++j)
6021 {
6022 if (j == 0)
6023 {
6024 src = vec_oprnd1
c3a8f964 6025 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6026 op = vec_oprnd0
134c85ca 6027 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6028 }
6029 else if (modifier != NONE && (j & 1))
6030 {
6031 if (modifier == WIDEN)
6032 {
6033 src = vec_oprnd1
6034 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
6035 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6036 stmt, gsi);
6037 }
6038 else if (modifier == NARROW)
6039 {
6040 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6041 stmt, gsi);
6042 op = vec_oprnd0
134c85ca
RS
6043 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6044 vec_oprnd0);
3bab6342
AT
6045 }
6046 else
6047 gcc_unreachable ();
6048 }
6049 else
6050 {
6051 src = vec_oprnd1
6052 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
6053 op = vec_oprnd0
134c85ca
RS
6054 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6055 vec_oprnd0);
3bab6342
AT
6056 }
6057
6058 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6059 {
928686b1
RS
6060 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6061 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6062 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6063 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6064 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6066 src = var;
6067 }
6068
6069 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6070 {
928686b1
RS
6071 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6072 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6073 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6074 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6075 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6076 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6077 op = var;
6078 }
6079
6080 new_stmt
134c85ca 6081 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6082
6083 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6084
6085 if (prev_stmt_info == NULL)
6086 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6087 else
6088 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6089 prev_stmt_info = vinfo_for_stmt (new_stmt);
6090 }
6091 return true;
6092 }
6093
0d0293ac 6094 if (grouped_store)
ebfd146a 6095 {
e14c1050 6096 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
6097
6098 /* FORNOW */
a70d6342 6099 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6100
6101 /* We vectorize all the stmts of the interleaving group when we
6102 reach the last stmt in the group. */
e14c1050
IR
6103 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6104 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6105 && !slp)
6106 {
6107 *vec_stmt = NULL;
6108 return true;
6109 }
6110
6111 if (slp)
4b5caab7 6112 {
0d0293ac 6113 grouped_store = false;
4b5caab7
IR
6114 /* VEC_NUM is the number of vect stmts to be created for this
6115 group. */
6116 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6117 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 6118 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6119 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6120 op = vect_get_store_rhs (first_stmt);
4b5caab7 6121 }
ebfd146a 6122 else
4b5caab7
IR
6123 /* VEC_NUM is the number of vect stmts to be created for this
6124 group. */
ebfd146a 6125 vec_num = group_size;
44fc7854
BE
6126
6127 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6128 }
b8698a0f 6129 else
7cfb4d93 6130 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6131
73fbfcad 6132 if (dump_enabled_p ())
78c60e3d 6133 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6134 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6135
2de001ee
RS
6136 if (memory_access_type == VMAT_ELEMENTWISE
6137 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6138 {
6139 gimple_stmt_iterator incr_gsi;
6140 bool insert_after;
355fe088 6141 gimple *incr;
f2e2a985
MM
6142 tree offvar;
6143 tree ivstep;
6144 tree running_off;
6145 gimple_seq stmts = NULL;
6146 tree stride_base, stride_step, alias_off;
6147 tree vec_oprnd;
f502d50e 6148 unsigned int g;
4d694b27
RS
6149 /* Checked by get_load_store_type. */
6150 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6151
7cfb4d93 6152 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6153 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6154
6155 stride_base
6156 = fold_build_pointer_plus
f502d50e 6157 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 6158 size_binop (PLUS_EXPR,
f502d50e 6159 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
44fc7854 6160 convert_to_ptrofftype (DR_INIT (first_dr))));
f502d50e 6161 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
6162
6163 /* For a store with loop-invariant (but other than power-of-2)
6164 stride (i.e. not a grouped access) like so:
6165
6166 for (i = 0; i < n; i += stride)
6167 array[i] = ...;
6168
6169 we generate a new induction variable and new stores from
6170 the components of the (vectorized) rhs:
6171
6172 for (j = 0; ; j += VF*stride)
6173 vectemp = ...;
6174 tmp1 = vectemp[0];
6175 array[j] = tmp1;
6176 tmp2 = vectemp[1];
6177 array[j + stride] = tmp2;
6178 ...
6179 */
6180
4d694b27 6181 unsigned nstores = const_nunits;
b17dc4d4 6182 unsigned lnel = 1;
cee62fee 6183 tree ltype = elem_type;
04199738 6184 tree lvectype = vectype;
cee62fee
MM
6185 if (slp)
6186 {
4d694b27
RS
6187 if (group_size < const_nunits
6188 && const_nunits % group_size == 0)
b17dc4d4 6189 {
4d694b27 6190 nstores = const_nunits / group_size;
b17dc4d4
RB
6191 lnel = group_size;
6192 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6193 lvectype = vectype;
6194
6195 /* First check if vec_extract optab doesn't support extraction
6196 of vector elts directly. */
b397965c 6197 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6198 machine_mode vmode;
6199 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6200 || !VECTOR_MODE_P (vmode)
04199738
RB
6201 || (convert_optab_handler (vec_extract_optab,
6202 TYPE_MODE (vectype), vmode)
6203 == CODE_FOR_nothing))
6204 {
6205 /* Try to avoid emitting an extract of vector elements
6206 by performing the extracts using an integer type of the
6207 same size, extracting from a vector of those and then
6208 re-interpreting it as the original vector type if
6209 supported. */
6210 unsigned lsize
6211 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6212 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6213 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6214 /* If we can't construct such a vector fall back to
6215 element extracts from the original vector type and
6216 element size stores. */
4d694b27 6217 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6218 && VECTOR_MODE_P (vmode)
04199738
RB
6219 && (convert_optab_handler (vec_extract_optab,
6220 vmode, elmode)
6221 != CODE_FOR_nothing))
6222 {
4d694b27 6223 nstores = lnunits;
04199738
RB
6224 lnel = group_size;
6225 ltype = build_nonstandard_integer_type (lsize, 1);
6226 lvectype = build_vector_type (ltype, nstores);
6227 }
6228 /* Else fall back to vector extraction anyway.
6229 Fewer stores are more important than avoiding spilling
6230 of the vector we extract from. Compared to the
6231 construction case in vectorizable_load no store-forwarding
6232 issue exists here for reasonable archs. */
6233 }
b17dc4d4 6234 }
4d694b27
RS
6235 else if (group_size >= const_nunits
6236 && group_size % const_nunits == 0)
b17dc4d4
RB
6237 {
6238 nstores = 1;
4d694b27 6239 lnel = const_nunits;
b17dc4d4 6240 ltype = vectype;
04199738 6241 lvectype = vectype;
b17dc4d4 6242 }
cee62fee
MM
6243 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6244 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6245 }
6246
f2e2a985
MM
6247 ivstep = stride_step;
6248 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6249 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6250
6251 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6252
6253 create_iv (stride_base, ivstep, NULL,
6254 loop, &incr_gsi, insert_after,
6255 &offvar, NULL);
6256 incr = gsi_stmt (incr_gsi);
310213d4 6257 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
6258
6259 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6260 if (stmts)
6261 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6262
6263 prev_stmt_info = NULL;
44fc7854 6264 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6265 next_stmt = first_stmt;
6266 for (g = 0; g < group_size; g++)
f2e2a985 6267 {
f502d50e
MM
6268 running_off = offvar;
6269 if (g)
f2e2a985 6270 {
f502d50e
MM
6271 tree size = TYPE_SIZE_UNIT (ltype);
6272 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6273 size);
f502d50e 6274 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6275 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6276 running_off, pos);
f2e2a985 6277 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6278 running_off = newoff;
f502d50e 6279 }
b17dc4d4
RB
6280 unsigned int group_el = 0;
6281 unsigned HOST_WIDE_INT
6282 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6283 for (j = 0; j < ncopies; j++)
6284 {
c3a8f964 6285 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6286 and first_stmt == stmt. */
6287 if (j == 0)
6288 {
6289 if (slp)
6290 {
6291 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6292 slp_node);
f502d50e
MM
6293 vec_oprnd = vec_oprnds[0];
6294 }
6295 else
6296 {
c3a8f964 6297 op = vect_get_store_rhs (next_stmt);
81c40241 6298 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6299 }
6300 }
f2e2a985 6301 else
f502d50e
MM
6302 {
6303 if (slp)
6304 vec_oprnd = vec_oprnds[j];
6305 else
c079cbac 6306 {
81c40241 6307 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
6308 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6309 }
f502d50e 6310 }
04199738
RB
6311 /* Pun the vector to extract from if necessary. */
6312 if (lvectype != vectype)
6313 {
6314 tree tem = make_ssa_name (lvectype);
6315 gimple *pun
6316 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6317 lvectype, vec_oprnd));
6318 vect_finish_stmt_generation (stmt, pun, gsi);
6319 vec_oprnd = tem;
6320 }
f502d50e
MM
6321 for (i = 0; i < nstores; i++)
6322 {
6323 tree newref, newoff;
355fe088 6324 gimple *incr, *assign;
f502d50e
MM
6325 tree size = TYPE_SIZE (ltype);
6326 /* Extract the i'th component. */
6327 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6328 bitsize_int (i), size);
6329 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6330 size, pos);
6331
6332 elem = force_gimple_operand_gsi (gsi, elem, true,
6333 NULL_TREE, true,
6334 GSI_SAME_STMT);
6335
b17dc4d4
RB
6336 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6337 group_el * elsz);
f502d50e 6338 newref = build2 (MEM_REF, ltype,
b17dc4d4 6339 running_off, this_off);
f502d50e
MM
6340
6341 /* And store it to *running_off. */
6342 assign = gimple_build_assign (newref, elem);
6343 vect_finish_stmt_generation (stmt, assign, gsi);
6344
b17dc4d4
RB
6345 group_el += lnel;
6346 if (! slp
6347 || group_el == group_size)
6348 {
6349 newoff = copy_ssa_name (running_off, NULL);
6350 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6351 running_off, stride_step);
6352 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6353
b17dc4d4
RB
6354 running_off = newoff;
6355 group_el = 0;
6356 }
225ce44b
RB
6357 if (g == group_size - 1
6358 && !slp)
f502d50e
MM
6359 {
6360 if (j == 0 && i == 0)
225ce44b
RB
6361 STMT_VINFO_VEC_STMT (stmt_info)
6362 = *vec_stmt = assign;
f502d50e
MM
6363 else
6364 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6365 prev_stmt_info = vinfo_for_stmt (assign);
6366 }
6367 }
f2e2a985 6368 }
f502d50e 6369 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6370 if (slp)
6371 break;
f2e2a985 6372 }
778dd3b6
RB
6373
6374 vec_oprnds.release ();
f2e2a985
MM
6375 return true;
6376 }
6377
8c681247 6378 auto_vec<tree> dr_chain (group_size);
9771b263 6379 oprnds.create (group_size);
ebfd146a 6380
720f5239 6381 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6382 gcc_assert (alignment_support_scheme);
7cfb4d93 6383 bool masked_loop_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
272c6793 6384 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6385 realignment. vect_supportable_dr_alignment always returns either
6386 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6387 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6388 && !mask
6389 && !masked_loop_p)
272c6793
RS
6390 || alignment_support_scheme == dr_aligned
6391 || alignment_support_scheme == dr_unaligned_supported);
6392
62da9e14
RS
6393 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6394 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6395 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6396
2de001ee 6397 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
6398 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6399 else
6400 aggr_type = vectype;
ebfd146a 6401
c3a8f964
RS
6402 if (mask)
6403 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6404
ebfd146a
IR
6405 /* In case the vectorization factor (VF) is bigger than the number
6406 of elements that we can fit in a vectype (nunits), we have to generate
6407 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6408 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6409 vect_get_vec_def_for_copy_stmt. */
6410
0d0293ac 6411 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6412
6413 S1: &base + 2 = x2
6414 S2: &base = x0
6415 S3: &base + 1 = x1
6416 S4: &base + 3 = x3
6417
6418 We create vectorized stores starting from base address (the access of the
6419 first stmt in the chain (S2 in the above example), when the last store stmt
6420 of the chain (S4) is reached:
6421
6422 VS1: &base = vx2
6423 VS2: &base + vec_size*1 = vx0
6424 VS3: &base + vec_size*2 = vx1
6425 VS4: &base + vec_size*3 = vx3
6426
6427 Then permutation statements are generated:
6428
3fcc1b55
JJ
6429 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6430 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6431 ...
b8698a0f 6432
ebfd146a
IR
6433 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6434 (the order of the data-refs in the output of vect_permute_store_chain
6435 corresponds to the order of scalar stmts in the interleaving chain - see
6436 the documentation of vect_permute_store_chain()).
6437
6438 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6439 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6440 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6441 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6442 */
6443
6444 prev_stmt_info = NULL;
c3a8f964 6445 tree vec_mask = NULL_TREE;
7cfb4d93 6446 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
ebfd146a
IR
6447 for (j = 0; j < ncopies; j++)
6448 {
ebfd146a
IR
6449
6450 if (j == 0)
6451 {
6452 if (slp)
6453 {
6454 /* Get vectorized arguments for SLP_NODE. */
d092494c 6455 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6456 NULL, slp_node);
ebfd146a 6457
9771b263 6458 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6459 }
6460 else
6461 {
b8698a0f
L
6462 /* For interleaved stores we collect vectorized defs for all the
6463 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6464 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6465 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6466
0d0293ac 6467 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6468 OPRNDS are of size 1. */
b8698a0f 6469 next_stmt = first_stmt;
ebfd146a
IR
6470 for (i = 0; i < group_size; i++)
6471 {
b8698a0f
L
6472 /* Since gaps are not supported for interleaved stores,
6473 GROUP_SIZE is the exact number of stmts in the chain.
6474 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6475 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a 6476 iteration of the loop will be executed. */
c3a8f964 6477 op = vect_get_store_rhs (next_stmt);
81c40241 6478 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6479 dr_chain.quick_push (vec_oprnd);
6480 oprnds.quick_push (vec_oprnd);
e14c1050 6481 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6482 }
c3a8f964
RS
6483 if (mask)
6484 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6485 mask_vectype);
ebfd146a
IR
6486 }
6487
6488 /* We should have catched mismatched types earlier. */
6489 gcc_assert (useless_type_conversion_p (vectype,
6490 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6491 bool simd_lane_access_p
6492 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6493 if (simd_lane_access_p
6494 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6495 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6496 && integer_zerop (DR_OFFSET (first_dr))
6497 && integer_zerop (DR_INIT (first_dr))
6498 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6499 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6500 {
6501 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6502 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6503 inv_p = false;
74bf76ed
JJ
6504 }
6505 else
6506 dataref_ptr
6507 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6508 simd_lane_access_p ? loop : NULL,
09dfa495 6509 offset, &dummy, gsi, &ptr_incr,
74bf76ed 6510 simd_lane_access_p, &inv_p);
a70d6342 6511 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6512 }
b8698a0f 6513 else
ebfd146a 6514 {
b8698a0f
L
6515 /* For interleaved stores we created vectorized defs for all the
6516 defs stored in OPRNDS in the previous iteration (previous copy).
6517 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6518 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6519 next copy.
0d0293ac 6520 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6521 OPRNDS are of size 1. */
6522 for (i = 0; i < group_size; i++)
6523 {
9771b263 6524 op = oprnds[i];
81c40241 6525 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 6526 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
6527 dr_chain[i] = vec_oprnd;
6528 oprnds[i] = vec_oprnd;
ebfd146a 6529 }
c3a8f964
RS
6530 if (mask)
6531 {
6532 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
6533 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
6534 }
74bf76ed
JJ
6535 if (dataref_offset)
6536 dataref_offset
6537 = int_const_binop (PLUS_EXPR, dataref_offset,
6538 TYPE_SIZE_UNIT (aggr_type));
6539 else
6540 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6541 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
6542 }
6543
2de001ee 6544 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 6545 {
272c6793 6546 tree vec_array;
267d3070 6547
272c6793
RS
6548 /* Combine all the vectors into an array. */
6549 vec_array = create_vector_array (vectype, vec_num);
6550 for (i = 0; i < vec_num; i++)
c2d7ab2a 6551 {
9771b263 6552 vec_oprnd = dr_chain[i];
272c6793 6553 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 6554 }
b8698a0f 6555
7cfb4d93
RS
6556 tree final_mask = NULL;
6557 if (masked_loop_p)
6558 final_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
6559 if (vec_mask)
6560 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6561 vec_mask, gsi);
6562
7e11fc7f 6563 gcall *call;
7cfb4d93 6564 if (final_mask)
7e11fc7f
RS
6565 {
6566 /* Emit:
6567 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6568 VEC_ARRAY). */
6569 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6570 tree alias_ptr = build_int_cst (ref_type, align);
6571 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6572 dataref_ptr, alias_ptr,
7cfb4d93 6573 final_mask, vec_array);
7e11fc7f
RS
6574 }
6575 else
6576 {
6577 /* Emit:
6578 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6579 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6580 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6581 vec_array);
6582 gimple_call_set_lhs (call, data_ref);
6583 }
a844293d
RS
6584 gimple_call_set_nothrow (call, true);
6585 new_stmt = call;
267d3070 6586 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6587 }
6588 else
6589 {
6590 new_stmt = NULL;
0d0293ac 6591 if (grouped_store)
272c6793 6592 {
b6b9227d
JJ
6593 if (j == 0)
6594 result_chain.create (group_size);
272c6793
RS
6595 /* Permute. */
6596 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6597 &result_chain);
6598 }
c2d7ab2a 6599
272c6793
RS
6600 next_stmt = first_stmt;
6601 for (i = 0; i < vec_num; i++)
6602 {
644ffefd 6603 unsigned align, misalign;
272c6793 6604
7cfb4d93
RS
6605 tree final_mask = NULL_TREE;
6606 if (masked_loop_p)
6607 final_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6608 vectype, vec_num * j + i);
6609 if (vec_mask)
6610 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6611 vec_mask, gsi);
6612
272c6793
RS
6613 if (i > 0)
6614 /* Bump the vector pointer. */
6615 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6616 stmt, NULL_TREE);
6617
6618 if (slp)
9771b263 6619 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
6620 else if (grouped_store)
6621 /* For grouped stores vectorized defs are interleaved in
272c6793 6622 vect_permute_store_chain(). */
9771b263 6623 vec_oprnd = result_chain[i];
272c6793 6624
f702e7d4 6625 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 6626 if (aligned_access_p (first_dr))
644ffefd 6627 misalign = 0;
272c6793
RS
6628 else if (DR_MISALIGNMENT (first_dr) == -1)
6629 {
25f68d90 6630 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 6631 misalign = 0;
272c6793
RS
6632 }
6633 else
c3a8f964 6634 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
6635 if (dataref_offset == NULL_TREE
6636 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6637 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6638 misalign);
c2d7ab2a 6639
62da9e14 6640 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6641 {
6642 tree perm_mask = perm_mask_for_reverse (vectype);
6643 tree perm_dest
c3a8f964 6644 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 6645 vectype);
b731b390 6646 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6647
6648 /* Generate the permute statement. */
355fe088 6649 gimple *perm_stmt
0d0e4a03
JJ
6650 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6651 vec_oprnd, perm_mask);
09dfa495
BM
6652 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6653
6654 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6655 vec_oprnd = new_temp;
6656 }
6657
272c6793 6658 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 6659 if (final_mask)
c3a8f964
RS
6660 {
6661 align = least_bit_hwi (misalign | align);
6662 tree ptr = build_int_cst (ref_type, align);
6663 gcall *call
6664 = gimple_build_call_internal (IFN_MASK_STORE, 4,
6665 dataref_ptr, ptr,
7cfb4d93 6666 final_mask, vec_oprnd);
c3a8f964
RS
6667 gimple_call_set_nothrow (call, true);
6668 new_stmt = call;
6669 }
6670 else
6671 {
6672 data_ref = fold_build2 (MEM_REF, vectype,
6673 dataref_ptr,
6674 dataref_offset
6675 ? dataref_offset
6676 : build_int_cst (ref_type, 0));
6677 if (aligned_access_p (first_dr))
6678 ;
6679 else if (DR_MISALIGNMENT (first_dr) == -1)
6680 TREE_TYPE (data_ref)
6681 = build_aligned_type (TREE_TYPE (data_ref),
6682 align * BITS_PER_UNIT);
6683 else
6684 TREE_TYPE (data_ref)
6685 = build_aligned_type (TREE_TYPE (data_ref),
6686 TYPE_ALIGN (elem_type));
6687 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6688 }
272c6793 6689 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6690
6691 if (slp)
6692 continue;
6693
e14c1050 6694 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6695 if (!next_stmt)
6696 break;
6697 }
ebfd146a 6698 }
1da0876c
RS
6699 if (!slp)
6700 {
6701 if (j == 0)
6702 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6703 else
6704 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6705 prev_stmt_info = vinfo_for_stmt (new_stmt);
6706 }
ebfd146a
IR
6707 }
6708
9771b263
DN
6709 oprnds.release ();
6710 result_chain.release ();
6711 vec_oprnds.release ();
ebfd146a
IR
6712
6713 return true;
6714}
6715
557be5a8
AL
6716/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6717 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 6718 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 6719 vect_gen_perm_mask_checked. */
a1e53f3f 6720
3fcc1b55 6721tree
4aae3cb3 6722vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 6723{
b00cb3bf 6724 tree mask_type;
a1e53f3f 6725
0ecc2b7d
RS
6726 poly_uint64 nunits = sel.length ();
6727 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
6728
6729 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 6730 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
6731}
6732
7ac7e286 6733/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 6734 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6735
6736tree
4aae3cb3 6737vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 6738{
7ac7e286 6739 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
6740 return vect_gen_perm_mask_any (vectype, sel);
6741}
6742
aec7ae7d
JJ
6743/* Given a vector variable X and Y, that was generated for the scalar
6744 STMT, generate instructions to permute the vector elements of X and Y
6745 using permutation mask MASK_VEC, insert them at *GSI and return the
6746 permuted vector variable. */
a1e53f3f
L
6747
6748static tree
355fe088 6749permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6750 gimple_stmt_iterator *gsi)
a1e53f3f
L
6751{
6752 tree vectype = TREE_TYPE (x);
aec7ae7d 6753 tree perm_dest, data_ref;
355fe088 6754 gimple *perm_stmt;
a1e53f3f 6755
7ad429a4
RS
6756 tree scalar_dest = gimple_get_lhs (stmt);
6757 if (TREE_CODE (scalar_dest) == SSA_NAME)
6758 perm_dest = vect_create_destination_var (scalar_dest, vectype);
6759 else
6760 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 6761 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6762
6763 /* Generate the permute statement. */
0d0e4a03 6764 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6765 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6766
6767 return data_ref;
6768}
6769
6b916b36
RB
6770/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6771 inserting them on the loops preheader edge. Returns true if we
6772 were successful in doing so (and thus STMT can be moved then),
6773 otherwise returns false. */
6774
6775static bool
355fe088 6776hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6777{
6778 ssa_op_iter i;
6779 tree op;
6780 bool any = false;
6781
6782 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6783 {
355fe088 6784 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6785 if (!gimple_nop_p (def_stmt)
6786 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6787 {
6788 /* Make sure we don't need to recurse. While we could do
6789 so in simple cases when there are more complex use webs
6790 we don't have an easy way to preserve stmt order to fulfil
6791 dependencies within them. */
6792 tree op2;
6793 ssa_op_iter i2;
d1417442
JJ
6794 if (gimple_code (def_stmt) == GIMPLE_PHI)
6795 return false;
6b916b36
RB
6796 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6797 {
355fe088 6798 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6799 if (!gimple_nop_p (def_stmt2)
6800 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6801 return false;
6802 }
6803 any = true;
6804 }
6805 }
6806
6807 if (!any)
6808 return true;
6809
6810 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6811 {
355fe088 6812 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6813 if (!gimple_nop_p (def_stmt)
6814 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6815 {
6816 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6817 gsi_remove (&gsi, false);
6818 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6819 }
6820 }
6821
6822 return true;
6823}
6824
ebfd146a
IR
6825/* vectorizable_load.
6826
b8698a0f
L
6827 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6828 can be vectorized.
6829 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6830 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6831 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6832
6833static bool
355fe088 6834vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6835 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6836{
6837 tree scalar_dest;
6838 tree vec_dest = NULL;
6839 tree data_ref = NULL;
6840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6841 stmt_vec_info prev_stmt_info;
ebfd146a 6842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6843 struct loop *loop = NULL;
ebfd146a 6844 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6845 bool nested_in_vect_loop = false;
c716e67f 6846 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6847 tree elem_type;
ebfd146a 6848 tree new_temp;
ef4bddc2 6849 machine_mode mode;
355fe088 6850 gimple *new_stmt = NULL;
ebfd146a
IR
6851 tree dummy;
6852 enum dr_alignment_support alignment_support_scheme;
6853 tree dataref_ptr = NULL_TREE;
74bf76ed 6854 tree dataref_offset = NULL_TREE;
355fe088 6855 gimple *ptr_incr = NULL;
ebfd146a 6856 int ncopies;
4d694b27
RS
6857 int i, j;
6858 unsigned int group_size;
6859 poly_uint64 group_gap_adj;
ebfd146a
IR
6860 tree msq = NULL_TREE, lsq;
6861 tree offset = NULL_TREE;
356bbc4c 6862 tree byte_offset = NULL_TREE;
ebfd146a 6863 tree realignment_token = NULL_TREE;
538dd0b7 6864 gphi *phi = NULL;
6e1aa848 6865 vec<tree> dr_chain = vNULL;
0d0293ac 6866 bool grouped_load = false;
355fe088 6867 gimple *first_stmt;
4f0a0218 6868 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
6869 bool inv_p;
6870 bool compute_in_loop = false;
6871 struct loop *at_loop;
6872 int vec_num;
6873 bool slp = (slp_node != NULL);
6874 bool slp_perm = false;
a70d6342 6875 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 6876 poly_uint64 vf;
272c6793 6877 tree aggr_type;
134c85ca 6878 gather_scatter_info gs_info;
310213d4 6879 vec_info *vinfo = stmt_info->vinfo;
44fc7854 6880 tree ref_type;
a70d6342 6881
465c8c19
JJ
6882 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6883 return false;
6884
66c16fd9
RB
6885 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6886 && ! vec_stmt)
465c8c19
JJ
6887 return false;
6888
c3a8f964
RS
6889 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6890 if (is_gimple_assign (stmt))
6891 {
6892 scalar_dest = gimple_assign_lhs (stmt);
6893 if (TREE_CODE (scalar_dest) != SSA_NAME)
6894 return false;
465c8c19 6895
c3a8f964
RS
6896 tree_code code = gimple_assign_rhs_code (stmt);
6897 if (code != ARRAY_REF
6898 && code != BIT_FIELD_REF
6899 && code != INDIRECT_REF
6900 && code != COMPONENT_REF
6901 && code != IMAGPART_EXPR
6902 && code != REALPART_EXPR
6903 && code != MEM_REF
6904 && TREE_CODE_CLASS (code) != tcc_declaration)
6905 return false;
6906 }
6907 else
6908 {
6909 gcall *call = dyn_cast <gcall *> (stmt);
6910 if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD))
6911 return false;
465c8c19 6912
c3a8f964
RS
6913 scalar_dest = gimple_call_lhs (call);
6914 if (!scalar_dest)
6915 return false;
6916
6917 if (slp_node != NULL)
6918 {
6919 if (dump_enabled_p ())
6920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6921 "SLP of masked loads not supported.\n");
6922 return false;
6923 }
6924
6925 mask = gimple_call_arg (call, 2);
6926 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
6927 return false;
6928 }
465c8c19
JJ
6929
6930 if (!STMT_VINFO_DATA_REF (stmt_info))
6931 return false;
6932
6933 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 6934 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 6935
a70d6342
IR
6936 if (loop_vinfo)
6937 {
6938 loop = LOOP_VINFO_LOOP (loop_vinfo);
6939 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6940 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6941 }
6942 else
3533e503 6943 vf = 1;
ebfd146a
IR
6944
6945 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6946 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6947 case of SLP. */
fce57248 6948 if (slp)
ebfd146a
IR
6949 ncopies = 1;
6950 else
e8f142e2 6951 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
6952
6953 gcc_assert (ncopies >= 1);
6954
6955 /* FORNOW. This restriction should be relaxed. */
6956 if (nested_in_vect_loop && ncopies > 1)
6957 {
73fbfcad 6958 if (dump_enabled_p ())
78c60e3d 6959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6960 "multiple types in nested loop.\n");
ebfd146a
IR
6961 return false;
6962 }
6963
f2556b68
RB
6964 /* Invalidate assumptions made by dependence analysis when vectorization
6965 on the unrolled body effectively re-orders stmts. */
6966 if (ncopies > 1
6967 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
6968 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6969 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
6970 {
6971 if (dump_enabled_p ())
6972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6973 "cannot perform implicit CSE when unrolling "
6974 "with negative dependence distance\n");
6975 return false;
6976 }
6977
7b7b1813 6978 elem_type = TREE_TYPE (vectype);
947131ba 6979 mode = TYPE_MODE (vectype);
ebfd146a
IR
6980
6981 /* FORNOW. In some cases can vectorize even if data-type not supported
6982 (e.g. - data copies). */
947131ba 6983 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6984 {
73fbfcad 6985 if (dump_enabled_p ())
78c60e3d 6986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6987 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6988 return false;
6989 }
6990
ebfd146a 6991 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6992 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6993 {
0d0293ac 6994 grouped_load = true;
ebfd146a 6995 /* FORNOW */
2de001ee
RS
6996 gcc_assert (!nested_in_vect_loop);
6997 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6998
e14c1050 6999 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 7000 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7001
b1af7da6
RB
7002 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7003 slp_perm = true;
7004
f2556b68
RB
7005 /* Invalidate assumptions made by dependence analysis when vectorization
7006 on the unrolled body effectively re-orders stmts. */
7007 if (!PURE_SLP_STMT (stmt_info)
7008 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7009 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7010 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7011 {
7012 if (dump_enabled_p ())
7013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7014 "cannot perform implicit CSE when performing "
7015 "group loads with negative dependence distance\n");
7016 return false;
7017 }
96bb56b2
RB
7018
7019 /* Similarly when the stmt is a load that is both part of a SLP
7020 instance and a loop vectorized stmt via the same-dr mechanism
7021 we have to give up. */
7022 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7023 && (STMT_SLP_TYPE (stmt_info)
7024 != STMT_SLP_TYPE (vinfo_for_stmt
7025 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7026 {
7027 if (dump_enabled_p ())
7028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7029 "conflicting SLP types for CSEd load\n");
7030 return false;
7031 }
ebfd146a 7032 }
7cfb4d93
RS
7033 else
7034 group_size = 1;
ebfd146a 7035
2de001ee 7036 vect_memory_access_type memory_access_type;
7e11fc7f 7037 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7038 &memory_access_type, &gs_info))
7039 return false;
a1e53f3f 7040
c3a8f964
RS
7041 if (mask)
7042 {
7043 if (memory_access_type == VMAT_CONTIGUOUS)
7044 {
7e11fc7f
RS
7045 machine_mode vec_mode = TYPE_MODE (vectype);
7046 if (!VECTOR_MODE_P (vec_mode)
7047 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7048 TYPE_MODE (mask_vectype), true))
7049 return false;
7050 }
7051 else if (memory_access_type == VMAT_GATHER_SCATTER)
7052 {
7053 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7054 tree masktype
7055 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7056 if (TREE_CODE (masktype) == INTEGER_TYPE)
7057 {
7058 if (dump_enabled_p ())
7059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7060 "masked gather with integer mask not"
7061 " supported.");
7062 return false;
7063 }
7064 }
7e11fc7f 7065 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
c3a8f964
RS
7066 {
7067 if (dump_enabled_p ())
7068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7069 "unsupported access type for masked load.\n");
7070 return false;
7071 }
7072 }
7073
ebfd146a
IR
7074 if (!vec_stmt) /* transformation not required. */
7075 {
2de001ee
RS
7076 if (!slp)
7077 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7078
7079 if (loop_vinfo
7080 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7081 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7082 memory_access_type);
7083
ebfd146a 7084 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
7085 /* The SLP costs are calculated during SLP analysis. */
7086 if (!PURE_SLP_STMT (stmt_info))
2de001ee 7087 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2e8ab70c 7088 NULL, NULL, NULL);
ebfd146a
IR
7089 return true;
7090 }
7091
2de001ee
RS
7092 if (!slp)
7093 gcc_assert (memory_access_type
7094 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7095
73fbfcad 7096 if (dump_enabled_p ())
78c60e3d 7097 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7098 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7099
67b8dbac 7100 /* Transform. */
ebfd146a 7101
f702e7d4 7102 ensure_base_align (dr);
c716e67f 7103
2de001ee 7104 if (memory_access_type == VMAT_GATHER_SCATTER)
aec7ae7d 7105 {
c3a8f964 7106 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
aec7ae7d
JJ
7107 return true;
7108 }
2de001ee
RS
7109
7110 if (memory_access_type == VMAT_ELEMENTWISE
7111 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7112 {
7113 gimple_stmt_iterator incr_gsi;
7114 bool insert_after;
355fe088 7115 gimple *incr;
7d75abc8 7116 tree offvar;
7d75abc8
MM
7117 tree ivstep;
7118 tree running_off;
9771b263 7119 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 7120 gimple_seq stmts = NULL;
14ac6aa2 7121 tree stride_base, stride_step, alias_off;
4d694b27
RS
7122 /* Checked by get_load_store_type. */
7123 unsigned int const_nunits = nunits.to_constant ();
14ac6aa2 7124
7cfb4d93 7125 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7126 gcc_assert (!nested_in_vect_loop);
7d75abc8 7127
f502d50e 7128 if (slp && grouped_load)
44fc7854
BE
7129 {
7130 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7131 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7132 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7133 ref_type = get_group_alias_ptr_type (first_stmt);
7134 }
ab313a8c 7135 else
44fc7854
BE
7136 {
7137 first_stmt = stmt;
7138 first_dr = dr;
7139 group_size = 1;
7140 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7141 }
ab313a8c 7142
14ac6aa2
RB
7143 stride_base
7144 = fold_build_pointer_plus
ab313a8c 7145 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7146 size_binop (PLUS_EXPR,
ab313a8c
RB
7147 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7148 convert_to_ptrofftype (DR_INIT (first_dr))));
7149 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7150
7151 /* For a load with loop-invariant (but other than power-of-2)
7152 stride (i.e. not a grouped access) like so:
7153
7154 for (i = 0; i < n; i += stride)
7155 ... = array[i];
7156
7157 we generate a new induction variable and new accesses to
7158 form a new vector (or vectors, depending on ncopies):
7159
7160 for (j = 0; ; j += VF*stride)
7161 tmp1 = array[j];
7162 tmp2 = array[j + stride];
7163 ...
7164 vectemp = {tmp1, tmp2, ...}
7165 */
7166
ab313a8c
RB
7167 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7168 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7169
7170 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7171
ab313a8c 7172 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
7173 loop, &incr_gsi, insert_after,
7174 &offvar, NULL);
7175 incr = gsi_stmt (incr_gsi);
310213d4 7176 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7177
ab313a8c
RB
7178 stride_step = force_gimple_operand (unshare_expr (stride_step),
7179 &stmts, true, NULL_TREE);
7d75abc8
MM
7180 if (stmts)
7181 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7182
7183 prev_stmt_info = NULL;
7184 running_off = offvar;
44fc7854 7185 alias_off = build_int_cst (ref_type, 0);
4d694b27 7186 int nloads = const_nunits;
e09b4c37 7187 int lnel = 1;
7b5fc413 7188 tree ltype = TREE_TYPE (vectype);
ea60dd34 7189 tree lvectype = vectype;
b266b968 7190 auto_vec<tree> dr_chain;
2de001ee 7191 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7192 {
4d694b27 7193 if (group_size < const_nunits)
e09b4c37 7194 {
ff03930a
JJ
7195 /* First check if vec_init optab supports construction from
7196 vector elts directly. */
b397965c 7197 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7198 machine_mode vmode;
7199 if (mode_for_vector (elmode, group_size).exists (&vmode)
7200 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7201 && (convert_optab_handler (vec_init_optab,
7202 TYPE_MODE (vectype), vmode)
7203 != CODE_FOR_nothing))
ea60dd34 7204 {
4d694b27 7205 nloads = const_nunits / group_size;
ea60dd34 7206 lnel = group_size;
ff03930a
JJ
7207 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7208 }
7209 else
7210 {
7211 /* Otherwise avoid emitting a constructor of vector elements
7212 by performing the loads using an integer type of the same
7213 size, constructing a vector of those and then
7214 re-interpreting it as the original vector type.
7215 This avoids a huge runtime penalty due to the general
7216 inability to perform store forwarding from smaller stores
7217 to a larger load. */
7218 unsigned lsize
7219 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7220 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7221 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7222 /* If we can't construct such a vector fall back to
7223 element loads of the original vector type. */
4d694b27 7224 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7225 && VECTOR_MODE_P (vmode)
ff03930a
JJ
7226 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7227 != CODE_FOR_nothing))
7228 {
4d694b27 7229 nloads = lnunits;
ff03930a
JJ
7230 lnel = group_size;
7231 ltype = build_nonstandard_integer_type (lsize, 1);
7232 lvectype = build_vector_type (ltype, nloads);
7233 }
ea60dd34 7234 }
e09b4c37 7235 }
2de001ee 7236 else
e09b4c37 7237 {
ea60dd34 7238 nloads = 1;
4d694b27 7239 lnel = const_nunits;
e09b4c37 7240 ltype = vectype;
e09b4c37 7241 }
2de001ee
RS
7242 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7243 }
7244 if (slp)
7245 {
66c16fd9
RB
7246 /* For SLP permutation support we need to load the whole group,
7247 not only the number of vector stmts the permutation result
7248 fits in. */
b266b968 7249 if (slp_perm)
66c16fd9 7250 {
d9f21f6a
RS
7251 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7252 variable VF. */
7253 unsigned int const_vf = vf.to_constant ();
4d694b27 7254 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7255 dr_chain.create (ncopies);
7256 }
7257 else
7258 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7259 }
4d694b27 7260 unsigned int group_el = 0;
e09b4c37
RB
7261 unsigned HOST_WIDE_INT
7262 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7263 for (j = 0; j < ncopies; j++)
7264 {
7b5fc413 7265 if (nloads > 1)
e09b4c37
RB
7266 vec_alloc (v, nloads);
7267 for (i = 0; i < nloads; i++)
7b5fc413 7268 {
e09b4c37
RB
7269 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7270 group_el * elsz);
7271 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7272 build2 (MEM_REF, ltype,
7273 running_off, this_off));
7274 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7275 if (nloads > 1)
7276 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7277 gimple_assign_lhs (new_stmt));
7278
7279 group_el += lnel;
7280 if (! slp
7281 || group_el == group_size)
7b5fc413 7282 {
e09b4c37
RB
7283 tree newoff = copy_ssa_name (running_off);
7284 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7285 running_off, stride_step);
7b5fc413
RB
7286 vect_finish_stmt_generation (stmt, incr, gsi);
7287
7288 running_off = newoff;
e09b4c37 7289 group_el = 0;
7b5fc413 7290 }
7b5fc413 7291 }
e09b4c37 7292 if (nloads > 1)
7d75abc8 7293 {
ea60dd34
RB
7294 tree vec_inv = build_constructor (lvectype, v);
7295 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7296 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7297 if (lvectype != vectype)
7298 {
7299 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7300 VIEW_CONVERT_EXPR,
7301 build1 (VIEW_CONVERT_EXPR,
7302 vectype, new_temp));
7303 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7304 }
7d75abc8
MM
7305 }
7306
7b5fc413 7307 if (slp)
b266b968 7308 {
b266b968
RB
7309 if (slp_perm)
7310 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7311 else
7312 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7313 }
7d75abc8 7314 else
225ce44b
RB
7315 {
7316 if (j == 0)
7317 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7318 else
7319 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7320 prev_stmt_info = vinfo_for_stmt (new_stmt);
7321 }
7d75abc8 7322 }
b266b968 7323 if (slp_perm)
29afecdf
RB
7324 {
7325 unsigned n_perms;
7326 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7327 slp_node_instance, false, &n_perms);
7328 }
7d75abc8
MM
7329 return true;
7330 }
aec7ae7d 7331
0d0293ac 7332 if (grouped_load)
ebfd146a 7333 {
e14c1050 7334 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7335 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7336 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7337 without permutation. */
7338 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7339 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7340 /* For BB vectorization always use the first stmt to base
7341 the data ref pointer on. */
7342 if (bb_vinfo)
7343 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7344
ebfd146a 7345 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7346 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7347 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7348 ??? But we can only do so if there is exactly one
7349 as we have no way to get at the rest. Leave the CSE
7350 opportunity alone.
7351 ??? With the group load eventually participating
7352 in multiple different permutations (having multiple
7353 slp nodes which refer to the same group) the CSE
7354 is even wrong code. See PR56270. */
7355 && !slp)
ebfd146a
IR
7356 {
7357 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7358 return true;
7359 }
7360 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7361 group_gap_adj = 0;
ebfd146a
IR
7362
7363 /* VEC_NUM is the number of vect stmts to be created for this group. */
7364 if (slp)
7365 {
0d0293ac 7366 grouped_load = false;
91ff1504
RB
7367 /* For SLP permutation support we need to load the whole group,
7368 not only the number of vector stmts the permutation result
7369 fits in. */
7370 if (slp_perm)
b267968e 7371 {
d9f21f6a
RS
7372 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7373 variable VF. */
7374 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7375 unsigned int const_nunits = nunits.to_constant ();
7376 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7377 group_gap_adj = vf * group_size - nunits * vec_num;
7378 }
91ff1504 7379 else
b267968e
RB
7380 {
7381 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7382 group_gap_adj
7383 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7384 }
a70d6342 7385 }
ebfd146a 7386 else
9b999e8c 7387 vec_num = group_size;
44fc7854
BE
7388
7389 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7390 }
7391 else
7392 {
7393 first_stmt = stmt;
7394 first_dr = dr;
7395 group_size = vec_num = 1;
9b999e8c 7396 group_gap_adj = 0;
44fc7854 7397 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7398 }
7399
720f5239 7400 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7401 gcc_assert (alignment_support_scheme);
7cfb4d93
RS
7402 bool masked_loop_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7403 /* Targets with store-lane instructions must not require explicit
7404 realignment. vect_supportable_dr_alignment always returns either
7405 dr_aligned or dr_unaligned_supported for masked operations. */
7406 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7407 && !mask
7408 && !masked_loop_p)
272c6793
RS
7409 || alignment_support_scheme == dr_aligned
7410 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7411
7412 /* In case the vectorization factor (VF) is bigger than the number
7413 of elements that we can fit in a vectype (nunits), we have to generate
7414 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7415 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7416 from one copy of the vector stmt to the next, in the field
ff802fa1 7417 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7418 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7419 stmts that use the defs of the current stmt. The example below
7420 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7421 need to create 4 vectorized stmts):
ebfd146a
IR
7422
7423 before vectorization:
7424 RELATED_STMT VEC_STMT
7425 S1: x = memref - -
7426 S2: z = x + 1 - -
7427
7428 step 1: vectorize stmt S1:
7429 We first create the vector stmt VS1_0, and, as usual, record a
7430 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7431 Next, we create the vector stmt VS1_1, and record a pointer to
7432 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7433 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7434 stmts and pointers:
7435 RELATED_STMT VEC_STMT
7436 VS1_0: vx0 = memref0 VS1_1 -
7437 VS1_1: vx1 = memref1 VS1_2 -
7438 VS1_2: vx2 = memref2 VS1_3 -
7439 VS1_3: vx3 = memref3 - -
7440 S1: x = load - VS1_0
7441 S2: z = x + 1 - -
7442
b8698a0f
L
7443 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7444 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7445 stmt S2. */
7446
0d0293ac 7447 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7448
7449 S1: x2 = &base + 2
7450 S2: x0 = &base
7451 S3: x1 = &base + 1
7452 S4: x3 = &base + 3
7453
b8698a0f 7454 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7455 starting from the access of the first stmt of the chain:
7456
7457 VS1: vx0 = &base
7458 VS2: vx1 = &base + vec_size*1
7459 VS3: vx3 = &base + vec_size*2
7460 VS4: vx4 = &base + vec_size*3
7461
7462 Then permutation statements are generated:
7463
e2c83630
RH
7464 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7465 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7466 ...
7467
7468 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7469 (the order of the data-refs in the output of vect_permute_load_chain
7470 corresponds to the order of scalar stmts in the interleaving chain - see
7471 the documentation of vect_permute_load_chain()).
7472 The generation of permutation stmts and recording them in
0d0293ac 7473 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 7474
b8698a0f 7475 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
7476 permutation stmts above are created for every copy. The result vector
7477 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7478 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
7479
7480 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7481 on a target that supports unaligned accesses (dr_unaligned_supported)
7482 we generate the following code:
7483 p = initial_addr;
7484 indx = 0;
7485 loop {
7486 p = p + indx * vectype_size;
7487 vec_dest = *(p);
7488 indx = indx + 1;
7489 }
7490
7491 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 7492 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
7493 then generate the following code, in which the data in each iteration is
7494 obtained by two vector loads, one from the previous iteration, and one
7495 from the current iteration:
7496 p1 = initial_addr;
7497 msq_init = *(floor(p1))
7498 p2 = initial_addr + VS - 1;
7499 realignment_token = call target_builtin;
7500 indx = 0;
7501 loop {
7502 p2 = p2 + indx * vectype_size
7503 lsq = *(floor(p2))
7504 vec_dest = realign_load (msq, lsq, realignment_token)
7505 indx = indx + 1;
7506 msq = lsq;
7507 } */
7508
7509 /* If the misalignment remains the same throughout the execution of the
7510 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7511 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7512 This can only occur when vectorizing memory accesses in the inner-loop
7513 nested within an outer-loop that is being vectorized. */
7514
d1e4b493 7515 if (nested_in_vect_loop
cf098191
RS
7516 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7517 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
7518 {
7519 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7520 compute_in_loop = true;
7521 }
7522
7523 if ((alignment_support_scheme == dr_explicit_realign_optimized
7524 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7525 && !compute_in_loop)
ebfd146a
IR
7526 {
7527 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7528 alignment_support_scheme, NULL_TREE,
7529 &at_loop);
7530 if (alignment_support_scheme == dr_explicit_realign_optimized)
7531 {
538dd0b7 7532 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7533 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7534 size_one_node);
ebfd146a
IR
7535 }
7536 }
7537 else
7538 at_loop = loop;
7539
62da9e14 7540 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
7541 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7542
2de001ee 7543 if (memory_access_type == VMAT_LOAD_STORE_LANES)
272c6793
RS
7544 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7545 else
7546 aggr_type = vectype;
7547
c3a8f964 7548 tree vec_mask = NULL_TREE;
ebfd146a 7549 prev_stmt_info = NULL;
4d694b27 7550 poly_uint64 group_elt = 0;
7cfb4d93 7551 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
ebfd146a 7552 for (j = 0; j < ncopies; j++)
b8698a0f 7553 {
272c6793 7554 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7555 if (j == 0)
74bf76ed
JJ
7556 {
7557 bool simd_lane_access_p
7558 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7559 if (simd_lane_access_p
7560 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7561 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7562 && integer_zerop (DR_OFFSET (first_dr))
7563 && integer_zerop (DR_INIT (first_dr))
7564 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7565 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
7566 && (alignment_support_scheme == dr_aligned
7567 || alignment_support_scheme == dr_unaligned_supported))
7568 {
7569 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7570 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7571 inv_p = false;
74bf76ed 7572 }
4f0a0218
RB
7573 else if (first_stmt_for_drptr
7574 && first_stmt != first_stmt_for_drptr)
7575 {
7576 dataref_ptr
7577 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7578 at_loop, offset, &dummy, gsi,
7579 &ptr_incr, simd_lane_access_p,
7580 &inv_p, byte_offset);
7581 /* Adjust the pointer by the difference to first_stmt. */
7582 data_reference_p ptrdr
7583 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7584 tree diff = fold_convert (sizetype,
7585 size_binop (MINUS_EXPR,
7586 DR_INIT (first_dr),
7587 DR_INIT (ptrdr)));
7588 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7589 stmt, diff);
7590 }
74bf76ed
JJ
7591 else
7592 dataref_ptr
7593 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7594 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7595 simd_lane_access_p, &inv_p,
7596 byte_offset);
c3a8f964
RS
7597 if (mask)
7598 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7599 mask_vectype);
74bf76ed 7600 }
ebfd146a 7601 else
c3a8f964
RS
7602 {
7603 if (dataref_offset)
7604 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7605 TYPE_SIZE_UNIT (aggr_type));
7606 else
7607 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7608 TYPE_SIZE_UNIT (aggr_type));
7609 if (mask)
7610 {
7611 gimple *def_stmt;
7612 vect_def_type dt;
7613 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
7614 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
7615 }
7616 }
ebfd146a 7617
0d0293ac 7618 if (grouped_load || slp_perm)
9771b263 7619 dr_chain.create (vec_num);
5ce1ee7f 7620
2de001ee 7621 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7622 {
272c6793
RS
7623 tree vec_array;
7624
7625 vec_array = create_vector_array (vectype, vec_num);
7626
7cfb4d93
RS
7627 tree final_mask = NULL_TREE;
7628 if (masked_loop_p)
7629 final_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
7630 if (vec_mask)
7631 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7632 vec_mask, gsi);
7633
7e11fc7f 7634 gcall *call;
7cfb4d93 7635 if (final_mask)
7e11fc7f
RS
7636 {
7637 /* Emit:
7638 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
7639 VEC_MASK). */
7640 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7641 tree alias_ptr = build_int_cst (ref_type, align);
7642 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
7643 dataref_ptr, alias_ptr,
7cfb4d93 7644 final_mask);
7e11fc7f
RS
7645 }
7646 else
7647 {
7648 /* Emit:
7649 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7650 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7651 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7652 }
a844293d
RS
7653 gimple_call_set_lhs (call, vec_array);
7654 gimple_call_set_nothrow (call, true);
7655 new_stmt = call;
272c6793 7656 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7657
272c6793
RS
7658 /* Extract each vector into an SSA_NAME. */
7659 for (i = 0; i < vec_num; i++)
ebfd146a 7660 {
272c6793
RS
7661 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7662 vec_array, i);
9771b263 7663 dr_chain.quick_push (new_temp);
272c6793
RS
7664 }
7665
7666 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7667 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7668 }
7669 else
7670 {
7671 for (i = 0; i < vec_num; i++)
7672 {
7cfb4d93
RS
7673 tree final_mask = NULL_TREE;
7674 if (masked_loop_p
7675 && memory_access_type != VMAT_INVARIANT)
7676 final_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
7677 vectype, vec_num * j + i);
7678 if (vec_mask)
7679 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7680 vec_mask, gsi);
7681
272c6793
RS
7682 if (i > 0)
7683 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7684 stmt, NULL_TREE);
7685
7686 /* 2. Create the vector-load in the loop. */
7687 switch (alignment_support_scheme)
7688 {
7689 case dr_aligned:
7690 case dr_unaligned_supported:
be1ac4ec 7691 {
644ffefd
MJ
7692 unsigned int align, misalign;
7693
f702e7d4 7694 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
7695 if (alignment_support_scheme == dr_aligned)
7696 {
7697 gcc_assert (aligned_access_p (first_dr));
644ffefd 7698 misalign = 0;
272c6793
RS
7699 }
7700 else if (DR_MISALIGNMENT (first_dr) == -1)
7701 {
25f68d90 7702 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7703 misalign = 0;
272c6793
RS
7704 }
7705 else
c3a8f964 7706 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7707 if (dataref_offset == NULL_TREE
7708 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7709 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7710 align, misalign);
c3a8f964 7711
7cfb4d93 7712 if (final_mask)
c3a8f964
RS
7713 {
7714 align = least_bit_hwi (misalign | align);
7715 tree ptr = build_int_cst (ref_type, align);
7716 gcall *call
7717 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
7718 dataref_ptr, ptr,
7cfb4d93 7719 final_mask);
c3a8f964
RS
7720 gimple_call_set_nothrow (call, true);
7721 new_stmt = call;
7722 data_ref = NULL_TREE;
7723 }
7724 else
7725 {
7726 data_ref
7727 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7728 dataref_offset
7729 ? dataref_offset
7730 : build_int_cst (ref_type, 0));
7731 if (alignment_support_scheme == dr_aligned)
7732 ;
7733 else if (DR_MISALIGNMENT (first_dr) == -1)
7734 TREE_TYPE (data_ref)
7735 = build_aligned_type (TREE_TYPE (data_ref),
7736 align * BITS_PER_UNIT);
7737 else
7738 TREE_TYPE (data_ref)
7739 = build_aligned_type (TREE_TYPE (data_ref),
7740 TYPE_ALIGN (elem_type));
7741 }
272c6793 7742 break;
be1ac4ec 7743 }
272c6793 7744 case dr_explicit_realign:
267d3070 7745 {
272c6793 7746 tree ptr, bump;
272c6793 7747
d88981fc 7748 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7749
7750 if (compute_in_loop)
7751 msq = vect_setup_realignment (first_stmt, gsi,
7752 &realignment_token,
7753 dr_explicit_realign,
7754 dataref_ptr, NULL);
7755
aed93b23
RB
7756 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7757 ptr = copy_ssa_name (dataref_ptr);
7758 else
7759 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 7760 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
7761 new_stmt = gimple_build_assign
7762 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7763 build_int_cst
7764 (TREE_TYPE (dataref_ptr),
f702e7d4 7765 -(HOST_WIDE_INT) align));
272c6793
RS
7766 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7767 data_ref
7768 = build2 (MEM_REF, vectype, ptr,
44fc7854 7769 build_int_cst (ref_type, 0));
272c6793
RS
7770 vec_dest = vect_create_destination_var (scalar_dest,
7771 vectype);
7772 new_stmt = gimple_build_assign (vec_dest, data_ref);
7773 new_temp = make_ssa_name (vec_dest, new_stmt);
7774 gimple_assign_set_lhs (new_stmt, new_temp);
7775 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7776 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7778 msq = new_temp;
7779
d88981fc 7780 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7781 TYPE_SIZE_UNIT (elem_type));
d88981fc 7782 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7783 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7784 new_stmt = gimple_build_assign
7785 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 7786 build_int_cst
f702e7d4 7787 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 7788 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7789 gimple_assign_set_lhs (new_stmt, ptr);
7790 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7791 data_ref
7792 = build2 (MEM_REF, vectype, ptr,
44fc7854 7793 build_int_cst (ref_type, 0));
272c6793 7794 break;
267d3070 7795 }
272c6793 7796 case dr_explicit_realign_optimized:
f702e7d4
RS
7797 {
7798 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7799 new_temp = copy_ssa_name (dataref_ptr);
7800 else
7801 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7802 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7803 new_stmt = gimple_build_assign
7804 (new_temp, BIT_AND_EXPR, dataref_ptr,
7805 build_int_cst (TREE_TYPE (dataref_ptr),
7806 -(HOST_WIDE_INT) align));
7807 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7808 data_ref
7809 = build2 (MEM_REF, vectype, new_temp,
7810 build_int_cst (ref_type, 0));
7811 break;
7812 }
272c6793
RS
7813 default:
7814 gcc_unreachable ();
7815 }
ebfd146a 7816 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
7817 /* DATA_REF is null if we've already built the statement. */
7818 if (data_ref)
7819 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a 7820 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 7821 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
7822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7823
272c6793
RS
7824 /* 3. Handle explicit realignment if necessary/supported.
7825 Create in loop:
7826 vec_dest = realign_load (msq, lsq, realignment_token) */
7827 if (alignment_support_scheme == dr_explicit_realign_optimized
7828 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7829 {
272c6793
RS
7830 lsq = gimple_assign_lhs (new_stmt);
7831 if (!realignment_token)
7832 realignment_token = dataref_ptr;
7833 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7834 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7835 msq, lsq, realignment_token);
272c6793
RS
7836 new_temp = make_ssa_name (vec_dest, new_stmt);
7837 gimple_assign_set_lhs (new_stmt, new_temp);
7838 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7839
7840 if (alignment_support_scheme == dr_explicit_realign_optimized)
7841 {
7842 gcc_assert (phi);
7843 if (i == vec_num - 1 && j == ncopies - 1)
7844 add_phi_arg (phi, lsq,
7845 loop_latch_edge (containing_loop),
9e227d60 7846 UNKNOWN_LOCATION);
272c6793
RS
7847 msq = lsq;
7848 }
ebfd146a 7849 }
ebfd146a 7850
59fd17e3
RB
7851 /* 4. Handle invariant-load. */
7852 if (inv_p && !bb_vinfo)
7853 {
59fd17e3 7854 gcc_assert (!grouped_load);
d1417442
JJ
7855 /* If we have versioned for aliasing or the loop doesn't
7856 have any data dependencies that would preclude this,
7857 then we are sure this is a loop invariant load and
7858 thus we can insert it on the preheader edge. */
7859 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7860 && !nested_in_vect_loop
6b916b36 7861 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7862 {
7863 if (dump_enabled_p ())
7864 {
7865 dump_printf_loc (MSG_NOTE, vect_location,
7866 "hoisting out of the vectorized "
7867 "loop: ");
7868 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7869 }
b731b390 7870 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7871 gsi_insert_on_edge_immediate
7872 (loop_preheader_edge (loop),
7873 gimple_build_assign (tem,
7874 unshare_expr
7875 (gimple_assign_rhs1 (stmt))));
7876 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7877 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7878 set_vinfo_for_stmt (new_stmt,
7879 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7880 }
7881 else
7882 {
7883 gimple_stmt_iterator gsi2 = *gsi;
7884 gsi_next (&gsi2);
7885 new_temp = vect_init_vector (stmt, scalar_dest,
7886 vectype, &gsi2);
34cd48e5 7887 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7888 }
59fd17e3
RB
7889 }
7890
62da9e14 7891 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 7892 {
aec7ae7d
JJ
7893 tree perm_mask = perm_mask_for_reverse (vectype);
7894 new_temp = permute_vec_elements (new_temp, new_temp,
7895 perm_mask, stmt, gsi);
ebfd146a
IR
7896 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7897 }
267d3070 7898
272c6793 7899 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7900 vect_transform_grouped_load (). */
7901 if (grouped_load || slp_perm)
9771b263 7902 dr_chain.quick_push (new_temp);
267d3070 7903
272c6793
RS
7904 /* Store vector loads in the corresponding SLP_NODE. */
7905 if (slp && !slp_perm)
9771b263 7906 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
7907
7908 /* With SLP permutation we load the gaps as well, without
7909 we need to skip the gaps after we manage to fully load
7910 all elements. group_gap_adj is GROUP_SIZE here. */
7911 group_elt += nunits;
d9f21f6a
RS
7912 if (maybe_ne (group_gap_adj, 0U)
7913 && !slp_perm
7914 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 7915 {
d9f21f6a
RS
7916 poly_wide_int bump_val
7917 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7918 * group_gap_adj);
8e6cdc90 7919 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
7920 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7921 stmt, bump);
7922 group_elt = 0;
7923 }
272c6793 7924 }
9b999e8c
RB
7925 /* Bump the vector pointer to account for a gap or for excess
7926 elements loaded for a permuted SLP load. */
d9f21f6a 7927 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 7928 {
d9f21f6a
RS
7929 poly_wide_int bump_val
7930 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7931 * group_gap_adj);
8e6cdc90 7932 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
7933 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7934 stmt, bump);
7935 }
ebfd146a
IR
7936 }
7937
7938 if (slp && !slp_perm)
7939 continue;
7940
7941 if (slp_perm)
7942 {
29afecdf 7943 unsigned n_perms;
01d8bf07 7944 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
7945 slp_node_instance, false,
7946 &n_perms))
ebfd146a 7947 {
9771b263 7948 dr_chain.release ();
ebfd146a
IR
7949 return false;
7950 }
7951 }
7952 else
7953 {
0d0293ac 7954 if (grouped_load)
ebfd146a 7955 {
2de001ee 7956 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 7957 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7958 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7959 }
7960 else
7961 {
7962 if (j == 0)
7963 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7964 else
7965 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7966 prev_stmt_info = vinfo_for_stmt (new_stmt);
7967 }
7968 }
9771b263 7969 dr_chain.release ();
ebfd146a
IR
7970 }
7971
ebfd146a
IR
7972 return true;
7973}
7974
7975/* Function vect_is_simple_cond.
b8698a0f 7976
ebfd146a
IR
7977 Input:
7978 LOOP - the loop that is being vectorized.
7979 COND - Condition that is checked for simple use.
7980
e9e1d143
RG
7981 Output:
7982 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 7983 *DTS - The def types for the arguments of the comparison
e9e1d143 7984
ebfd146a
IR
7985 Returns whether a COND can be vectorized. Checks whether
7986 condition operands are supportable using vec_is_simple_use. */
7987
87aab9b2 7988static bool
4fc5ebf1 7989vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
7990 tree *comp_vectype, enum vect_def_type *dts,
7991 tree vectype)
ebfd146a
IR
7992{
7993 tree lhs, rhs;
e9e1d143 7994 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7995
a414c77f
IE
7996 /* Mask case. */
7997 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 7998 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
7999 {
8000 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8001 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 8002 &dts[0], comp_vectype)
a414c77f
IE
8003 || !*comp_vectype
8004 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8005 return false;
8006 return true;
8007 }
8008
ebfd146a
IR
8009 if (!COMPARISON_CLASS_P (cond))
8010 return false;
8011
8012 lhs = TREE_OPERAND (cond, 0);
8013 rhs = TREE_OPERAND (cond, 1);
8014
8015 if (TREE_CODE (lhs) == SSA_NAME)
8016 {
355fe088 8017 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 8018 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
8019 return false;
8020 }
4fc5ebf1
JG
8021 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8022 || TREE_CODE (lhs) == FIXED_CST)
8023 dts[0] = vect_constant_def;
8024 else
ebfd146a
IR
8025 return false;
8026
8027 if (TREE_CODE (rhs) == SSA_NAME)
8028 {
355fe088 8029 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 8030 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
8031 return false;
8032 }
4fc5ebf1
JG
8033 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8034 || TREE_CODE (rhs) == FIXED_CST)
8035 dts[1] = vect_constant_def;
8036 else
ebfd146a
IR
8037 return false;
8038
28b33016 8039 if (vectype1 && vectype2
928686b1
RS
8040 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8041 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8042 return false;
8043
e9e1d143 8044 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8
RB
8045 /* Invariant comparison. */
8046 if (! *comp_vectype)
8047 {
8048 tree scalar_type = TREE_TYPE (lhs);
8049 /* If we can widen the comparison to match vectype do so. */
8050 if (INTEGRAL_TYPE_P (scalar_type)
8051 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8052 TYPE_SIZE (TREE_TYPE (vectype))))
8053 scalar_type = build_nonstandard_integer_type
8054 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8055 TYPE_UNSIGNED (scalar_type));
8056 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8057 }
8058
ebfd146a
IR
8059 return true;
8060}
8061
8062/* vectorizable_condition.
8063
b8698a0f
L
8064 Check if STMT is conditional modify expression that can be vectorized.
8065 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8066 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8067 at GSI.
8068
8069 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8070 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8071 else clause if it is 2).
ebfd146a
IR
8072
8073 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8074
4bbe8262 8075bool
355fe088
TS
8076vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8077 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 8078 slp_tree slp_node)
ebfd146a
IR
8079{
8080 tree scalar_dest = NULL_TREE;
8081 tree vec_dest = NULL_TREE;
01216d27
JJ
8082 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8083 tree then_clause, else_clause;
ebfd146a 8084 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8085 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8086 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8087 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8088 tree vec_compare;
ebfd146a
IR
8089 tree new_temp;
8090 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8091 enum vect_def_type dts[4]
8092 = {vect_unknown_def_type, vect_unknown_def_type,
8093 vect_unknown_def_type, vect_unknown_def_type};
8094 int ndts = 4;
f7e531cf 8095 int ncopies;
01216d27 8096 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8097 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8098 int i, j;
8099 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8100 vec<tree> vec_oprnds0 = vNULL;
8101 vec<tree> vec_oprnds1 = vNULL;
8102 vec<tree> vec_oprnds2 = vNULL;
8103 vec<tree> vec_oprnds3 = vNULL;
74946978 8104 tree vec_cmp_type;
a414c77f 8105 bool masked = false;
b8698a0f 8106
f7e531cf
IR
8107 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8108 return false;
8109
af29617a
AH
8110 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
8111 {
8112 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8113 return false;
ebfd146a 8114
af29617a
AH
8115 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8116 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8117 && reduc_def))
8118 return false;
ebfd146a 8119
af29617a
AH
8120 /* FORNOW: not yet supported. */
8121 if (STMT_VINFO_LIVE_P (stmt_info))
8122 {
8123 if (dump_enabled_p ())
8124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8125 "value used after loop.\n");
8126 return false;
8127 }
ebfd146a
IR
8128 }
8129
8130 /* Is vectorizable conditional operation? */
8131 if (!is_gimple_assign (stmt))
8132 return false;
8133
8134 code = gimple_assign_rhs_code (stmt);
8135
8136 if (code != COND_EXPR)
8137 return false;
8138
465c8c19 8139 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8140 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8141
fce57248 8142 if (slp_node)
465c8c19
JJ
8143 ncopies = 1;
8144 else
e8f142e2 8145 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8146
8147 gcc_assert (ncopies >= 1);
8148 if (reduc_index && ncopies > 1)
8149 return false; /* FORNOW */
8150
4e71066d
RG
8151 cond_expr = gimple_assign_rhs1 (stmt);
8152 then_clause = gimple_assign_rhs2 (stmt);
8153 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8154
4fc5ebf1 8155 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8da4c8d8 8156 &comp_vectype, &dts[0], vectype)
e9e1d143 8157 || !comp_vectype)
ebfd146a
IR
8158 return false;
8159
81c40241 8160 gimple *def_stmt;
4fc5ebf1 8161 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
8162 &vectype1))
8163 return false;
4fc5ebf1 8164 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 8165 &vectype2))
ebfd146a 8166 return false;
2947d3b2
IE
8167
8168 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8169 return false;
8170
8171 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8172 return false;
8173
28b33016
IE
8174 masked = !COMPARISON_CLASS_P (cond_expr);
8175 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8176
74946978
MP
8177 if (vec_cmp_type == NULL_TREE)
8178 return false;
784fb9b3 8179
01216d27
JJ
8180 cond_code = TREE_CODE (cond_expr);
8181 if (!masked)
8182 {
8183 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8184 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8185 }
8186
8187 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8188 {
8189 /* Boolean values may have another representation in vectors
8190 and therefore we prefer bit operations over comparison for
8191 them (which also works for scalar masks). We store opcodes
8192 to use in bitop1 and bitop2. Statement is vectorized as
8193 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8194 depending on bitop1 and bitop2 arity. */
8195 switch (cond_code)
8196 {
8197 case GT_EXPR:
8198 bitop1 = BIT_NOT_EXPR;
8199 bitop2 = BIT_AND_EXPR;
8200 break;
8201 case GE_EXPR:
8202 bitop1 = BIT_NOT_EXPR;
8203 bitop2 = BIT_IOR_EXPR;
8204 break;
8205 case LT_EXPR:
8206 bitop1 = BIT_NOT_EXPR;
8207 bitop2 = BIT_AND_EXPR;
8208 std::swap (cond_expr0, cond_expr1);
8209 break;
8210 case LE_EXPR:
8211 bitop1 = BIT_NOT_EXPR;
8212 bitop2 = BIT_IOR_EXPR;
8213 std::swap (cond_expr0, cond_expr1);
8214 break;
8215 case NE_EXPR:
8216 bitop1 = BIT_XOR_EXPR;
8217 break;
8218 case EQ_EXPR:
8219 bitop1 = BIT_XOR_EXPR;
8220 bitop2 = BIT_NOT_EXPR;
8221 break;
8222 default:
8223 return false;
8224 }
8225 cond_code = SSA_NAME;
8226 }
8227
b8698a0f 8228 if (!vec_stmt)
ebfd146a
IR
8229 {
8230 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
01216d27
JJ
8231 if (bitop1 != NOP_EXPR)
8232 {
8233 machine_mode mode = TYPE_MODE (comp_vectype);
8234 optab optab;
8235
8236 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8237 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8238 return false;
8239
8240 if (bitop2 != NOP_EXPR)
8241 {
8242 optab = optab_for_tree_code (bitop2, comp_vectype,
8243 optab_default);
8244 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8245 return false;
8246 }
8247 }
4fc5ebf1
JG
8248 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8249 cond_code))
8250 {
8251 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8252 return true;
8253 }
8254 return false;
ebfd146a
IR
8255 }
8256
f7e531cf
IR
8257 /* Transform. */
8258
8259 if (!slp_node)
8260 {
9771b263
DN
8261 vec_oprnds0.create (1);
8262 vec_oprnds1.create (1);
8263 vec_oprnds2.create (1);
8264 vec_oprnds3.create (1);
f7e531cf 8265 }
ebfd146a
IR
8266
8267 /* Handle def. */
8268 scalar_dest = gimple_assign_lhs (stmt);
8269 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8270
8271 /* Handle cond expr. */
a855b1b1
MM
8272 for (j = 0; j < ncopies; j++)
8273 {
538dd0b7 8274 gassign *new_stmt = NULL;
a855b1b1
MM
8275 if (j == 0)
8276 {
f7e531cf
IR
8277 if (slp_node)
8278 {
00f96dc9
TS
8279 auto_vec<tree, 4> ops;
8280 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8281
a414c77f 8282 if (masked)
01216d27 8283 ops.safe_push (cond_expr);
a414c77f
IE
8284 else
8285 {
01216d27
JJ
8286 ops.safe_push (cond_expr0);
8287 ops.safe_push (cond_expr1);
a414c77f 8288 }
9771b263
DN
8289 ops.safe_push (then_clause);
8290 ops.safe_push (else_clause);
306b0c92 8291 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8292 vec_oprnds3 = vec_defs.pop ();
8293 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8294 if (!masked)
8295 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8296 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8297 }
8298 else
8299 {
355fe088 8300 gimple *gtemp;
a414c77f
IE
8301 if (masked)
8302 {
8303 vec_cond_lhs
8304 = vect_get_vec_def_for_operand (cond_expr, stmt,
8305 comp_vectype);
8306 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8307 &gtemp, &dts[0]);
8308 }
8309 else
8310 {
01216d27
JJ
8311 vec_cond_lhs
8312 = vect_get_vec_def_for_operand (cond_expr0,
8313 stmt, comp_vectype);
8314 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8315
8316 vec_cond_rhs
8317 = vect_get_vec_def_for_operand (cond_expr1,
8318 stmt, comp_vectype);
8319 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8320 }
f7e531cf
IR
8321 if (reduc_index == 1)
8322 vec_then_clause = reduc_def;
8323 else
8324 {
8325 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8326 stmt);
8327 vect_is_simple_use (then_clause, loop_vinfo,
8328 &gtemp, &dts[2]);
f7e531cf
IR
8329 }
8330 if (reduc_index == 2)
8331 vec_else_clause = reduc_def;
8332 else
8333 {
8334 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8335 stmt);
8336 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8337 }
a855b1b1
MM
8338 }
8339 }
8340 else
8341 {
a414c77f
IE
8342 vec_cond_lhs
8343 = vect_get_vec_def_for_stmt_copy (dts[0],
8344 vec_oprnds0.pop ());
8345 if (!masked)
8346 vec_cond_rhs
8347 = vect_get_vec_def_for_stmt_copy (dts[1],
8348 vec_oprnds1.pop ());
8349
a855b1b1 8350 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8351 vec_oprnds2.pop ());
a855b1b1 8352 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8353 vec_oprnds3.pop ());
f7e531cf
IR
8354 }
8355
8356 if (!slp_node)
8357 {
9771b263 8358 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8359 if (!masked)
8360 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8361 vec_oprnds2.quick_push (vec_then_clause);
8362 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8363 }
8364
9dc3f7de 8365 /* Arguments are ready. Create the new vector stmt. */
9771b263 8366 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8367 {
9771b263
DN
8368 vec_then_clause = vec_oprnds2[i];
8369 vec_else_clause = vec_oprnds3[i];
a855b1b1 8370
a414c77f
IE
8371 if (masked)
8372 vec_compare = vec_cond_lhs;
8373 else
8374 {
8375 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8376 if (bitop1 == NOP_EXPR)
8377 vec_compare = build2 (cond_code, vec_cmp_type,
8378 vec_cond_lhs, vec_cond_rhs);
8379 else
8380 {
8381 new_temp = make_ssa_name (vec_cmp_type);
8382 if (bitop1 == BIT_NOT_EXPR)
8383 new_stmt = gimple_build_assign (new_temp, bitop1,
8384 vec_cond_rhs);
8385 else
8386 new_stmt
8387 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8388 vec_cond_rhs);
8389 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8390 if (bitop2 == NOP_EXPR)
8391 vec_compare = new_temp;
8392 else if (bitop2 == BIT_NOT_EXPR)
8393 {
8394 /* Instead of doing ~x ? y : z do x ? z : y. */
8395 vec_compare = new_temp;
8396 std::swap (vec_then_clause, vec_else_clause);
8397 }
8398 else
8399 {
8400 vec_compare = make_ssa_name (vec_cmp_type);
8401 new_stmt
8402 = gimple_build_assign (vec_compare, bitop2,
8403 vec_cond_lhs, new_temp);
8404 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8405 }
8406 }
a414c77f 8407 }
5958f9e2
JJ
8408 new_temp = make_ssa_name (vec_dest);
8409 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8410 vec_compare, vec_then_clause,
8411 vec_else_clause);
f7e531cf
IR
8412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8413 if (slp_node)
9771b263 8414 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
8415 }
8416
8417 if (slp_node)
8418 continue;
8419
8420 if (j == 0)
8421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8422 else
8423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8424
8425 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 8426 }
b8698a0f 8427
9771b263
DN
8428 vec_oprnds0.release ();
8429 vec_oprnds1.release ();
8430 vec_oprnds2.release ();
8431 vec_oprnds3.release ();
f7e531cf 8432
ebfd146a
IR
8433 return true;
8434}
8435
42fd8198
IE
8436/* vectorizable_comparison.
8437
8438 Check if STMT is comparison expression that can be vectorized.
8439 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8440 comparison, put it in VEC_STMT, and insert it at GSI.
8441
8442 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8443
fce57248 8444static bool
42fd8198
IE
8445vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8446 gimple **vec_stmt, tree reduc_def,
8447 slp_tree slp_node)
8448{
8449 tree lhs, rhs1, rhs2;
8450 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8451 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8452 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8453 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8454 tree new_temp;
8455 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8456 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 8457 int ndts = 2;
928686b1 8458 poly_uint64 nunits;
42fd8198 8459 int ncopies;
49e76ff1 8460 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
8461 stmt_vec_info prev_stmt_info = NULL;
8462 int i, j;
8463 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8464 vec<tree> vec_oprnds0 = vNULL;
8465 vec<tree> vec_oprnds1 = vNULL;
8466 gimple *def_stmt;
8467 tree mask_type;
8468 tree mask;
8469
c245362b
IE
8470 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8471 return false;
8472
30480bcd 8473 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
8474 return false;
8475
8476 mask_type = vectype;
8477 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8478
fce57248 8479 if (slp_node)
42fd8198
IE
8480 ncopies = 1;
8481 else
e8f142e2 8482 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
8483
8484 gcc_assert (ncopies >= 1);
42fd8198
IE
8485 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8486 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8487 && reduc_def))
8488 return false;
8489
8490 if (STMT_VINFO_LIVE_P (stmt_info))
8491 {
8492 if (dump_enabled_p ())
8493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8494 "value used after loop.\n");
8495 return false;
8496 }
8497
8498 if (!is_gimple_assign (stmt))
8499 return false;
8500
8501 code = gimple_assign_rhs_code (stmt);
8502
8503 if (TREE_CODE_CLASS (code) != tcc_comparison)
8504 return false;
8505
8506 rhs1 = gimple_assign_rhs1 (stmt);
8507 rhs2 = gimple_assign_rhs2 (stmt);
8508
8509 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8510 &dts[0], &vectype1))
8511 return false;
8512
8513 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8514 &dts[1], &vectype2))
8515 return false;
8516
8517 if (vectype1 && vectype2
928686b1
RS
8518 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8519 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
8520 return false;
8521
8522 vectype = vectype1 ? vectype1 : vectype2;
8523
8524 /* Invariant comparison. */
8525 if (!vectype)
8526 {
69a9a66f 8527 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 8528 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
8529 return false;
8530 }
928686b1 8531 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
8532 return false;
8533
49e76ff1
IE
8534 /* Can't compare mask and non-mask types. */
8535 if (vectype1 && vectype2
8536 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8537 return false;
8538
8539 /* Boolean values may have another representation in vectors
8540 and therefore we prefer bit operations over comparison for
8541 them (which also works for scalar masks). We store opcodes
8542 to use in bitop1 and bitop2. Statement is vectorized as
8543 BITOP2 (rhs1 BITOP1 rhs2) or
8544 rhs1 BITOP2 (BITOP1 rhs2)
8545 depending on bitop1 and bitop2 arity. */
8546 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8547 {
8548 if (code == GT_EXPR)
8549 {
8550 bitop1 = BIT_NOT_EXPR;
8551 bitop2 = BIT_AND_EXPR;
8552 }
8553 else if (code == GE_EXPR)
8554 {
8555 bitop1 = BIT_NOT_EXPR;
8556 bitop2 = BIT_IOR_EXPR;
8557 }
8558 else if (code == LT_EXPR)
8559 {
8560 bitop1 = BIT_NOT_EXPR;
8561 bitop2 = BIT_AND_EXPR;
8562 std::swap (rhs1, rhs2);
264d951a 8563 std::swap (dts[0], dts[1]);
49e76ff1
IE
8564 }
8565 else if (code == LE_EXPR)
8566 {
8567 bitop1 = BIT_NOT_EXPR;
8568 bitop2 = BIT_IOR_EXPR;
8569 std::swap (rhs1, rhs2);
264d951a 8570 std::swap (dts[0], dts[1]);
49e76ff1
IE
8571 }
8572 else
8573 {
8574 bitop1 = BIT_XOR_EXPR;
8575 if (code == EQ_EXPR)
8576 bitop2 = BIT_NOT_EXPR;
8577 }
8578 }
8579
42fd8198
IE
8580 if (!vec_stmt)
8581 {
8582 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
49e76ff1 8583 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
4fc5ebf1 8584 dts, ndts, NULL, NULL);
49e76ff1 8585 if (bitop1 == NOP_EXPR)
96592eed 8586 return expand_vec_cmp_expr_p (vectype, mask_type, code);
49e76ff1
IE
8587 else
8588 {
8589 machine_mode mode = TYPE_MODE (vectype);
8590 optab optab;
8591
8592 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8593 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8594 return false;
8595
8596 if (bitop2 != NOP_EXPR)
8597 {
8598 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8599 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8600 return false;
8601 }
8602 return true;
8603 }
42fd8198
IE
8604 }
8605
8606 /* Transform. */
8607 if (!slp_node)
8608 {
8609 vec_oprnds0.create (1);
8610 vec_oprnds1.create (1);
8611 }
8612
8613 /* Handle def. */
8614 lhs = gimple_assign_lhs (stmt);
8615 mask = vect_create_destination_var (lhs, mask_type);
8616
8617 /* Handle cmp expr. */
8618 for (j = 0; j < ncopies; j++)
8619 {
8620 gassign *new_stmt = NULL;
8621 if (j == 0)
8622 {
8623 if (slp_node)
8624 {
8625 auto_vec<tree, 2> ops;
8626 auto_vec<vec<tree>, 2> vec_defs;
8627
8628 ops.safe_push (rhs1);
8629 ops.safe_push (rhs2);
306b0c92 8630 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
8631 vec_oprnds1 = vec_defs.pop ();
8632 vec_oprnds0 = vec_defs.pop ();
8633 }
8634 else
8635 {
e4af0bc4
IE
8636 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8637 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
8638 }
8639 }
8640 else
8641 {
8642 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8643 vec_oprnds0.pop ());
8644 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8645 vec_oprnds1.pop ());
8646 }
8647
8648 if (!slp_node)
8649 {
8650 vec_oprnds0.quick_push (vec_rhs1);
8651 vec_oprnds1.quick_push (vec_rhs2);
8652 }
8653
8654 /* Arguments are ready. Create the new vector stmt. */
8655 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8656 {
8657 vec_rhs2 = vec_oprnds1[i];
8658
8659 new_temp = make_ssa_name (mask);
49e76ff1
IE
8660 if (bitop1 == NOP_EXPR)
8661 {
8662 new_stmt = gimple_build_assign (new_temp, code,
8663 vec_rhs1, vec_rhs2);
8664 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8665 }
8666 else
8667 {
8668 if (bitop1 == BIT_NOT_EXPR)
8669 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8670 else
8671 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8672 vec_rhs2);
8673 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8674 if (bitop2 != NOP_EXPR)
8675 {
8676 tree res = make_ssa_name (mask);
8677 if (bitop2 == BIT_NOT_EXPR)
8678 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8679 else
8680 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8681 new_temp);
8682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8683 }
8684 }
42fd8198
IE
8685 if (slp_node)
8686 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8687 }
8688
8689 if (slp_node)
8690 continue;
8691
8692 if (j == 0)
8693 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8694 else
8695 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8696
8697 prev_stmt_info = vinfo_for_stmt (new_stmt);
8698 }
8699
8700 vec_oprnds0.release ();
8701 vec_oprnds1.release ();
8702
8703 return true;
8704}
ebfd146a 8705
68a0f2ff
RS
8706/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8707 can handle all live statements in the node. Otherwise return true
8708 if STMT is not live or if vectorizable_live_operation can handle it.
8709 GSI and VEC_STMT are as for vectorizable_live_operation. */
8710
8711static bool
8712can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8713 slp_tree slp_node, gimple **vec_stmt)
8714{
8715 if (slp_node)
8716 {
8717 gimple *slp_stmt;
8718 unsigned int i;
8719 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8720 {
8721 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8722 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8723 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8724 vec_stmt))
8725 return false;
8726 }
8727 }
8728 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8729 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8730 return false;
8731
8732 return true;
8733}
8734
8644a673 8735/* Make sure the statement is vectorizable. */
ebfd146a
IR
8736
8737bool
891ad31c
RB
8738vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8739 slp_instance node_instance)
ebfd146a 8740{
8644a673 8741 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 8742 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 8743 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 8744 bool ok;
355fe088 8745 gimple *pattern_stmt;
363477c0 8746 gimple_seq pattern_def_seq;
ebfd146a 8747
73fbfcad 8748 if (dump_enabled_p ())
ebfd146a 8749 {
78c60e3d
SS
8750 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8751 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 8752 }
ebfd146a 8753
1825a1f3 8754 if (gimple_has_volatile_ops (stmt))
b8698a0f 8755 {
73fbfcad 8756 if (dump_enabled_p ())
78c60e3d 8757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8758 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
8759
8760 return false;
8761 }
b8698a0f
L
8762
8763 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
8764 to include:
8765 - the COND_EXPR which is the loop exit condition
8766 - any LABEL_EXPRs in the loop
b8698a0f 8767 - computations that are used only for array indexing or loop control.
8644a673 8768 In basic blocks we only analyze statements that are a part of some SLP
83197f37 8769 instance, therefore, all the statements are relevant.
ebfd146a 8770
d092494c 8771 Pattern statement needs to be analyzed instead of the original statement
83197f37 8772 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
8773 statements. In basic blocks we are called from some SLP instance
8774 traversal, don't analyze pattern stmts instead, the pattern stmts
8775 already will be part of SLP instance. */
83197f37
IR
8776
8777 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 8778 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 8779 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 8780 {
9d5e7640 8781 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 8782 && pattern_stmt
9d5e7640
IR
8783 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8784 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8785 {
83197f37 8786 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
8787 stmt = pattern_stmt;
8788 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 8789 if (dump_enabled_p ())
9d5e7640 8790 {
78c60e3d
SS
8791 dump_printf_loc (MSG_NOTE, vect_location,
8792 "==> examining pattern statement: ");
8793 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
8794 }
8795 }
8796 else
8797 {
73fbfcad 8798 if (dump_enabled_p ())
e645e942 8799 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 8800
9d5e7640
IR
8801 return true;
8802 }
8644a673 8803 }
83197f37 8804 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 8805 && node == NULL
83197f37
IR
8806 && pattern_stmt
8807 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8808 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8809 {
8810 /* Analyze PATTERN_STMT too. */
73fbfcad 8811 if (dump_enabled_p ())
83197f37 8812 {
78c60e3d
SS
8813 dump_printf_loc (MSG_NOTE, vect_location,
8814 "==> examining pattern statement: ");
8815 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
8816 }
8817
891ad31c
RB
8818 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8819 node_instance))
83197f37
IR
8820 return false;
8821 }
ebfd146a 8822
1107f3ae 8823 if (is_pattern_stmt_p (stmt_info)
079c527f 8824 && node == NULL
363477c0 8825 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 8826 {
363477c0 8827 gimple_stmt_iterator si;
1107f3ae 8828
363477c0
JJ
8829 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8830 {
355fe088 8831 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
8832 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8833 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8834 {
8835 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8836 if (dump_enabled_p ())
363477c0 8837 {
78c60e3d
SS
8838 dump_printf_loc (MSG_NOTE, vect_location,
8839 "==> examining pattern def statement: ");
8840 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8841 }
1107f3ae 8842
363477c0 8843 if (!vect_analyze_stmt (pattern_def_stmt,
891ad31c 8844 need_to_vectorize, node, node_instance))
363477c0
JJ
8845 return false;
8846 }
8847 }
8848 }
1107f3ae 8849
8644a673
IR
8850 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8851 {
8852 case vect_internal_def:
8853 break;
ebfd146a 8854
8644a673 8855 case vect_reduction_def:
7c5222ff 8856 case vect_nested_cycle:
14a61437
RB
8857 gcc_assert (!bb_vinfo
8858 && (relevance == vect_used_in_outer
8859 || relevance == vect_used_in_outer_by_reduction
8860 || relevance == vect_used_by_reduction
b28ead45
AH
8861 || relevance == vect_unused_in_scope
8862 || relevance == vect_used_only_live));
8644a673
IR
8863 break;
8864
8865 case vect_induction_def:
e7baeb39
RB
8866 gcc_assert (!bb_vinfo);
8867 break;
8868
8644a673
IR
8869 case vect_constant_def:
8870 case vect_external_def:
8871 case vect_unknown_def_type:
8872 default:
8873 gcc_unreachable ();
8874 }
ebfd146a 8875
8644a673 8876 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8877 {
8644a673 8878 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8879 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8880 || (is_gimple_call (stmt)
8881 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8882 *need_to_vectorize = true;
ebfd146a
IR
8883 }
8884
b1af7da6
RB
8885 if (PURE_SLP_STMT (stmt_info) && !node)
8886 {
8887 dump_printf_loc (MSG_NOTE, vect_location,
8888 "handled only by SLP analysis\n");
8889 return true;
8890 }
8891
8892 ok = true;
8893 if (!bb_vinfo
8894 && (STMT_VINFO_RELEVANT_P (stmt_info)
8895 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8896 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8897 || vectorizable_conversion (stmt, NULL, NULL, node)
8898 || vectorizable_shift (stmt, NULL, NULL, node)
8899 || vectorizable_operation (stmt, NULL, NULL, node)
8900 || vectorizable_assignment (stmt, NULL, NULL, node)
8901 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8902 || vectorizable_call (stmt, NULL, NULL, node)
8903 || vectorizable_store (stmt, NULL, NULL, node)
891ad31c 8904 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
e7baeb39 8905 || vectorizable_induction (stmt, NULL, NULL, node)
42fd8198
IE
8906 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8907 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8908 else
8909 {
8910 if (bb_vinfo)
8911 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8912 || vectorizable_conversion (stmt, NULL, NULL, node)
8913 || vectorizable_shift (stmt, NULL, NULL, node)
8914 || vectorizable_operation (stmt, NULL, NULL, node)
8915 || vectorizable_assignment (stmt, NULL, NULL, node)
8916 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8917 || vectorizable_call (stmt, NULL, NULL, node)
8918 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8919 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8920 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8921 }
8644a673
IR
8922
8923 if (!ok)
ebfd146a 8924 {
73fbfcad 8925 if (dump_enabled_p ())
8644a673 8926 {
78c60e3d
SS
8927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8928 "not vectorized: relevant stmt not ");
8929 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8930 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8931 }
b8698a0f 8932
ebfd146a
IR
8933 return false;
8934 }
8935
a70d6342
IR
8936 if (bb_vinfo)
8937 return true;
8938
8644a673
IR
8939 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8940 need extra handling, except for vectorizable reductions. */
68a0f2ff
RS
8941 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8942 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
ebfd146a 8943 {
73fbfcad 8944 if (dump_enabled_p ())
8644a673 8945 {
78c60e3d 8946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 8947 "not vectorized: live stmt not supported: ");
78c60e3d 8948 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8949 }
b8698a0f 8950
8644a673 8951 return false;
ebfd146a
IR
8952 }
8953
ebfd146a
IR
8954 return true;
8955}
8956
8957
8958/* Function vect_transform_stmt.
8959
8960 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8961
8962bool
355fe088 8963vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8964 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8965 slp_instance slp_node_instance)
8966{
8967 bool is_store = false;
355fe088 8968 gimple *vec_stmt = NULL;
ebfd146a 8969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8970 bool done;
ebfd146a 8971
fce57248 8972 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 8973 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8974
ebfd146a
IR
8975 switch (STMT_VINFO_TYPE (stmt_info))
8976 {
8977 case type_demotion_vec_info_type:
ebfd146a 8978 case type_promotion_vec_info_type:
ebfd146a
IR
8979 case type_conversion_vec_info_type:
8980 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8981 gcc_assert (done);
8982 break;
8983
8984 case induc_vec_info_type:
e7baeb39 8985 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8986 gcc_assert (done);
8987 break;
8988
9dc3f7de
IR
8989 case shift_vec_info_type:
8990 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8991 gcc_assert (done);
8992 break;
8993
ebfd146a
IR
8994 case op_vec_info_type:
8995 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8996 gcc_assert (done);
8997 break;
8998
8999 case assignment_vec_info_type:
9000 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9001 gcc_assert (done);
9002 break;
9003
9004 case load_vec_info_type:
b8698a0f 9005 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
9006 slp_node_instance);
9007 gcc_assert (done);
9008 break;
9009
9010 case store_vec_info_type:
9011 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9012 gcc_assert (done);
0d0293ac 9013 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9014 {
9015 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9016 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9017 one are skipped, and there vec_stmt_info shouldn't be freed
9018 meanwhile. */
0d0293ac 9019 *grouped_store = true;
ebfd146a
IR
9020 if (STMT_VINFO_VEC_STMT (stmt_info))
9021 is_store = true;
9022 }
9023 else
9024 is_store = true;
9025 break;
9026
9027 case condition_vec_info_type:
f7e531cf 9028 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
9029 gcc_assert (done);
9030 break;
9031
42fd8198
IE
9032 case comparison_vec_info_type:
9033 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9034 gcc_assert (done);
9035 break;
9036
ebfd146a 9037 case call_vec_info_type:
190c2236 9038 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 9039 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9040 break;
9041
0136f8f0
AH
9042 case call_simd_clone_vec_info_type:
9043 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9044 stmt = gsi_stmt (*gsi);
9045 break;
9046
ebfd146a 9047 case reduc_vec_info_type:
891ad31c
RB
9048 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9049 slp_node_instance);
ebfd146a
IR
9050 gcc_assert (done);
9051 break;
9052
9053 default:
9054 if (!STMT_VINFO_LIVE_P (stmt_info))
9055 {
73fbfcad 9056 if (dump_enabled_p ())
78c60e3d 9057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9058 "stmt not supported.\n");
ebfd146a
IR
9059 gcc_unreachable ();
9060 }
9061 }
9062
225ce44b
RB
9063 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9064 This would break hybrid SLP vectorization. */
9065 if (slp_node)
d90f8440
RB
9066 gcc_assert (!vec_stmt
9067 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9068
ebfd146a
IR
9069 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9070 is being vectorized, but outside the immediately enclosing loop. */
9071 if (vec_stmt
a70d6342
IR
9072 && STMT_VINFO_LOOP_VINFO (stmt_info)
9073 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
9074 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
9075 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9076 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9077 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9078 vect_used_in_outer_by_reduction))
ebfd146a 9079 {
a70d6342
IR
9080 struct loop *innerloop = LOOP_VINFO_LOOP (
9081 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9082 imm_use_iterator imm_iter;
9083 use_operand_p use_p;
9084 tree scalar_dest;
355fe088 9085 gimple *exit_phi;
ebfd146a 9086
73fbfcad 9087 if (dump_enabled_p ())
78c60e3d 9088 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9089 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9090
9091 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9092 (to be used when vectorizing outer-loop stmts that use the DEF of
9093 STMT). */
9094 if (gimple_code (stmt) == GIMPLE_PHI)
9095 scalar_dest = PHI_RESULT (stmt);
9096 else
9097 scalar_dest = gimple_assign_lhs (stmt);
9098
9099 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9100 {
9101 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9102 {
9103 exit_phi = USE_STMT (use_p);
9104 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9105 }
9106 }
9107 }
9108
9109 /* Handle stmts whose DEF is used outside the loop-nest that is
9110 being vectorized. */
68a0f2ff 9111 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9112 {
68a0f2ff 9113 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
ebfd146a
IR
9114 gcc_assert (done);
9115 }
9116
9117 if (vec_stmt)
83197f37 9118 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9119
b8698a0f 9120 return is_store;
ebfd146a
IR
9121}
9122
9123
b8698a0f 9124/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9125 stmt_vec_info. */
9126
9127void
355fe088 9128vect_remove_stores (gimple *first_stmt)
ebfd146a 9129{
355fe088
TS
9130 gimple *next = first_stmt;
9131 gimple *tmp;
ebfd146a
IR
9132 gimple_stmt_iterator next_si;
9133
9134 while (next)
9135 {
78048b1c
JJ
9136 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9137
9138 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9139 if (is_pattern_stmt_p (stmt_info))
9140 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9141 /* Free the attached stmt_vec_info and remove the stmt. */
9142 next_si = gsi_for_stmt (next);
3d3f2249 9143 unlink_stmt_vdef (next);
ebfd146a 9144 gsi_remove (&next_si, true);
3d3f2249 9145 release_defs (next);
ebfd146a
IR
9146 free_stmt_vec_info (next);
9147 next = tmp;
9148 }
9149}
9150
9151
9152/* Function new_stmt_vec_info.
9153
9154 Create and initialize a new stmt_vec_info struct for STMT. */
9155
9156stmt_vec_info
310213d4 9157new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9158{
9159 stmt_vec_info res;
9160 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9161
9162 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9163 STMT_VINFO_STMT (res) = stmt;
310213d4 9164 res->vinfo = vinfo;
8644a673 9165 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9166 STMT_VINFO_LIVE_P (res) = false;
9167 STMT_VINFO_VECTYPE (res) = NULL;
9168 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9169 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9170 STMT_VINFO_IN_PATTERN_P (res) = false;
9171 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9172 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9173 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9174 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9175 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9176
ebfd146a
IR
9177 if (gimple_code (stmt) == GIMPLE_PHI
9178 && is_loop_header_bb_p (gimple_bb (stmt)))
9179 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9180 else
8644a673
IR
9181 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9182
9771b263 9183 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9184 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9185 STMT_VINFO_NUM_SLP_USES (res) = 0;
9186
e14c1050
IR
9187 GROUP_FIRST_ELEMENT (res) = NULL;
9188 GROUP_NEXT_ELEMENT (res) = NULL;
9189 GROUP_SIZE (res) = 0;
9190 GROUP_STORE_COUNT (res) = 0;
9191 GROUP_GAP (res) = 0;
9192 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9193
9194 return res;
9195}
9196
9197
9198/* Create a hash table for stmt_vec_info. */
9199
9200void
9201init_stmt_vec_info_vec (void)
9202{
9771b263
DN
9203 gcc_assert (!stmt_vec_info_vec.exists ());
9204 stmt_vec_info_vec.create (50);
ebfd146a
IR
9205}
9206
9207
9208/* Free hash table for stmt_vec_info. */
9209
9210void
9211free_stmt_vec_info_vec (void)
9212{
93675444 9213 unsigned int i;
3161455c 9214 stmt_vec_info info;
93675444
JJ
9215 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9216 if (info != NULL)
3161455c 9217 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9218 gcc_assert (stmt_vec_info_vec.exists ());
9219 stmt_vec_info_vec.release ();
ebfd146a
IR
9220}
9221
9222
9223/* Free stmt vectorization related info. */
9224
9225void
355fe088 9226free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9227{
9228 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9229
9230 if (!stmt_info)
9231 return;
9232
78048b1c
JJ
9233 /* Check if this statement has a related "pattern stmt"
9234 (introduced by the vectorizer during the pattern recognition
9235 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9236 too. */
9237 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9238 {
9239 stmt_vec_info patt_info
9240 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9241 if (patt_info)
9242 {
363477c0 9243 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9244 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9245 gimple_set_bb (patt_stmt, NULL);
9246 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9247 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9248 release_ssa_name (lhs);
363477c0
JJ
9249 if (seq)
9250 {
9251 gimple_stmt_iterator si;
9252 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9253 {
355fe088 9254 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9255 gimple_set_bb (seq_stmt, NULL);
7532abf2 9256 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9257 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9258 release_ssa_name (lhs);
9259 free_stmt_vec_info (seq_stmt);
9260 }
363477c0 9261 }
f0281fde 9262 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9263 }
9264 }
9265
9771b263 9266 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9267 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9268 set_vinfo_for_stmt (stmt, NULL);
9269 free (stmt_info);
9270}
9271
9272
bb67d9c7 9273/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9274
bb67d9c7 9275 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9276 by the target. */
9277
c803b2a9 9278tree
86e36728 9279get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9280{
c7d97b28 9281 tree orig_scalar_type = scalar_type;
3bd8f481 9282 scalar_mode inner_mode;
ef4bddc2 9283 machine_mode simd_mode;
86e36728 9284 poly_uint64 nunits;
ebfd146a
IR
9285 tree vectype;
9286
3bd8f481
RS
9287 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9288 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9289 return NULL_TREE;
9290
3bd8f481 9291 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9292
7b7b1813
RG
9293 /* For vector types of elements whose mode precision doesn't
9294 match their types precision we use a element type of mode
9295 precision. The vectorization routines will have to make sure
48f2e373
RB
9296 they support the proper result truncation/extension.
9297 We also make sure to build vector types with INTEGER_TYPE
9298 component type only. */
6d7971b8 9299 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9300 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9301 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9302 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9303 TYPE_UNSIGNED (scalar_type));
6d7971b8 9304
ccbf5bb4
RG
9305 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9306 When the component mode passes the above test simply use a type
9307 corresponding to that mode. The theory is that any use that
9308 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9309 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9310 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9311 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9312
9313 /* We can't build a vector type of elements with alignment bigger than
9314 their size. */
dfc2e2ac 9315 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9316 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9317 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9318
dfc2e2ac
RB
9319 /* If we felt back to using the mode fail if there was
9320 no scalar type for it. */
9321 if (scalar_type == NULL_TREE)
9322 return NULL_TREE;
9323
bb67d9c7
RG
9324 /* If no size was supplied use the mode the target prefers. Otherwise
9325 lookup a vector mode of the specified size. */
86e36728 9326 if (known_eq (size, 0U))
bb67d9c7 9327 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9328 else if (!multiple_p (size, nbytes, &nunits)
9329 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9330 return NULL_TREE;
4c8fd8ac 9331 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9332 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9333 return NULL_TREE;
ebfd146a
IR
9334
9335 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9336
9337 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9338 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9339 return NULL_TREE;
ebfd146a 9340
c7d97b28
RB
9341 /* Re-attach the address-space qualifier if we canonicalized the scalar
9342 type. */
9343 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9344 return build_qualified_type
9345 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9346
ebfd146a
IR
9347 return vectype;
9348}
9349
86e36728 9350poly_uint64 current_vector_size;
bb67d9c7
RG
9351
9352/* Function get_vectype_for_scalar_type.
9353
9354 Returns the vector type corresponding to SCALAR_TYPE as supported
9355 by the target. */
9356
9357tree
9358get_vectype_for_scalar_type (tree scalar_type)
9359{
9360 tree vectype;
9361 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9362 current_vector_size);
9363 if (vectype
86e36728 9364 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9365 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9366 return vectype;
9367}
9368
42fd8198
IE
9369/* Function get_mask_type_for_scalar_type.
9370
9371 Returns the mask type corresponding to a result of comparison
9372 of vectors of specified SCALAR_TYPE as supported by target. */
9373
9374tree
9375get_mask_type_for_scalar_type (tree scalar_type)
9376{
9377 tree vectype = get_vectype_for_scalar_type (scalar_type);
9378
9379 if (!vectype)
9380 return NULL;
9381
9382 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9383 current_vector_size);
9384}
9385
b690cc0f
RG
9386/* Function get_same_sized_vectype
9387
9388 Returns a vector type corresponding to SCALAR_TYPE of size
9389 VECTOR_TYPE if supported by the target. */
9390
9391tree
bb67d9c7 9392get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9393{
2568d8a1 9394 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9395 return build_same_sized_truth_vector_type (vector_type);
9396
bb67d9c7
RG
9397 return get_vectype_for_scalar_type_and_size
9398 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9399}
9400
ebfd146a
IR
9401/* Function vect_is_simple_use.
9402
9403 Input:
81c40241
RB
9404 VINFO - the vect info of the loop or basic block that is being vectorized.
9405 OPERAND - operand in the loop or bb.
9406 Output:
9407 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9408 DT - the type of definition
ebfd146a
IR
9409
9410 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9411 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9412 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9413 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9414 is the case in reduction/induction computations).
9415 For basic blocks, supportable operands are constants and bb invariants.
9416 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9417
9418bool
81c40241
RB
9419vect_is_simple_use (tree operand, vec_info *vinfo,
9420 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 9421{
ebfd146a 9422 *def_stmt = NULL;
3fc356dc 9423 *dt = vect_unknown_def_type;
b8698a0f 9424
73fbfcad 9425 if (dump_enabled_p ())
ebfd146a 9426 {
78c60e3d
SS
9427 dump_printf_loc (MSG_NOTE, vect_location,
9428 "vect_is_simple_use: operand ");
9429 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 9430 dump_printf (MSG_NOTE, "\n");
ebfd146a 9431 }
b8698a0f 9432
b758f602 9433 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
9434 {
9435 *dt = vect_constant_def;
9436 return true;
9437 }
b8698a0f 9438
ebfd146a
IR
9439 if (is_gimple_min_invariant (operand))
9440 {
8644a673 9441 *dt = vect_external_def;
ebfd146a
IR
9442 return true;
9443 }
9444
ebfd146a
IR
9445 if (TREE_CODE (operand) != SSA_NAME)
9446 {
73fbfcad 9447 if (dump_enabled_p ())
af29617a
AH
9448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9449 "not ssa-name.\n");
ebfd146a
IR
9450 return false;
9451 }
b8698a0f 9452
3fc356dc 9453 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 9454 {
3fc356dc
RB
9455 *dt = vect_external_def;
9456 return true;
ebfd146a
IR
9457 }
9458
3fc356dc 9459 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 9460 if (dump_enabled_p ())
ebfd146a 9461 {
78c60e3d
SS
9462 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9463 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
9464 }
9465
61d371eb 9466 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 9467 *dt = vect_external_def;
ebfd146a
IR
9468 else
9469 {
3fc356dc 9470 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 9471 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
9472 }
9473
2e8ab70c
RB
9474 if (dump_enabled_p ())
9475 {
9476 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9477 switch (*dt)
9478 {
9479 case vect_uninitialized_def:
9480 dump_printf (MSG_NOTE, "uninitialized\n");
9481 break;
9482 case vect_constant_def:
9483 dump_printf (MSG_NOTE, "constant\n");
9484 break;
9485 case vect_external_def:
9486 dump_printf (MSG_NOTE, "external\n");
9487 break;
9488 case vect_internal_def:
9489 dump_printf (MSG_NOTE, "internal\n");
9490 break;
9491 case vect_induction_def:
9492 dump_printf (MSG_NOTE, "induction\n");
9493 break;
9494 case vect_reduction_def:
9495 dump_printf (MSG_NOTE, "reduction\n");
9496 break;
9497 case vect_double_reduction_def:
9498 dump_printf (MSG_NOTE, "double reduction\n");
9499 break;
9500 case vect_nested_cycle:
9501 dump_printf (MSG_NOTE, "nested cycle\n");
9502 break;
9503 case vect_unknown_def_type:
9504 dump_printf (MSG_NOTE, "unknown\n");
9505 break;
9506 }
9507 }
9508
81c40241 9509 if (*dt == vect_unknown_def_type)
ebfd146a 9510 {
73fbfcad 9511 if (dump_enabled_p ())
78c60e3d 9512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9513 "Unsupported pattern.\n");
ebfd146a
IR
9514 return false;
9515 }
9516
ebfd146a
IR
9517 switch (gimple_code (*def_stmt))
9518 {
9519 case GIMPLE_PHI:
ebfd146a 9520 case GIMPLE_ASSIGN:
ebfd146a 9521 case GIMPLE_CALL:
81c40241 9522 break;
ebfd146a 9523 default:
73fbfcad 9524 if (dump_enabled_p ())
78c60e3d 9525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9526 "unsupported defining stmt:\n");
ebfd146a
IR
9527 return false;
9528 }
9529
9530 return true;
9531}
9532
81c40241 9533/* Function vect_is_simple_use.
b690cc0f 9534
81c40241 9535 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
9536 type of OPERAND and stores it to *VECTYPE. If the definition of
9537 OPERAND is vect_uninitialized_def, vect_constant_def or
9538 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9539 is responsible to compute the best suited vector type for the
9540 scalar operand. */
9541
9542bool
81c40241
RB
9543vect_is_simple_use (tree operand, vec_info *vinfo,
9544 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 9545{
81c40241 9546 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
9547 return false;
9548
9549 /* Now get a vector type if the def is internal, otherwise supply
9550 NULL_TREE and leave it up to the caller to figure out a proper
9551 type for the use stmt. */
9552 if (*dt == vect_internal_def
9553 || *dt == vect_induction_def
9554 || *dt == vect_reduction_def
9555 || *dt == vect_double_reduction_def
9556 || *dt == vect_nested_cycle)
9557 {
9558 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
9559
9560 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9561 && !STMT_VINFO_RELEVANT (stmt_info)
9562 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 9563 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 9564
b690cc0f
RG
9565 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9566 gcc_assert (*vectype != NULL_TREE);
9567 }
9568 else if (*dt == vect_uninitialized_def
9569 || *dt == vect_constant_def
9570 || *dt == vect_external_def)
9571 *vectype = NULL_TREE;
9572 else
9573 gcc_unreachable ();
9574
9575 return true;
9576}
9577
ebfd146a
IR
9578
9579/* Function supportable_widening_operation
9580
b8698a0f
L
9581 Check whether an operation represented by the code CODE is a
9582 widening operation that is supported by the target platform in
b690cc0f
RG
9583 vector form (i.e., when operating on arguments of type VECTYPE_IN
9584 producing a result of type VECTYPE_OUT).
b8698a0f 9585
ebfd146a
IR
9586 Widening operations we currently support are NOP (CONVERT), FLOAT
9587 and WIDEN_MULT. This function checks if these operations are supported
9588 by the target platform either directly (via vector tree-codes), or via
9589 target builtins.
9590
9591 Output:
b8698a0f
L
9592 - CODE1 and CODE2 are codes of vector operations to be used when
9593 vectorizing the operation, if available.
ebfd146a
IR
9594 - MULTI_STEP_CVT determines the number of required intermediate steps in
9595 case of multi-step conversion (like char->short->int - in that case
9596 MULTI_STEP_CVT will be 1).
b8698a0f
L
9597 - INTERM_TYPES contains the intermediate type required to perform the
9598 widening operation (short in the above example). */
ebfd146a
IR
9599
9600bool
355fe088 9601supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 9602 tree vectype_out, tree vectype_in,
ebfd146a
IR
9603 enum tree_code *code1, enum tree_code *code2,
9604 int *multi_step_cvt,
9771b263 9605 vec<tree> *interm_types)
ebfd146a
IR
9606{
9607 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9608 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 9609 struct loop *vect_loop = NULL;
ef4bddc2 9610 machine_mode vec_mode;
81f40b79 9611 enum insn_code icode1, icode2;
ebfd146a 9612 optab optab1, optab2;
b690cc0f
RG
9613 tree vectype = vectype_in;
9614 tree wide_vectype = vectype_out;
ebfd146a 9615 enum tree_code c1, c2;
4a00c761
JJ
9616 int i;
9617 tree prev_type, intermediate_type;
ef4bddc2 9618 machine_mode intermediate_mode, prev_mode;
4a00c761 9619 optab optab3, optab4;
ebfd146a 9620
4a00c761 9621 *multi_step_cvt = 0;
4ef69dfc
IR
9622 if (loop_info)
9623 vect_loop = LOOP_VINFO_LOOP (loop_info);
9624
ebfd146a
IR
9625 switch (code)
9626 {
9627 case WIDEN_MULT_EXPR:
6ae6116f
RH
9628 /* The result of a vectorized widening operation usually requires
9629 two vectors (because the widened results do not fit into one vector).
9630 The generated vector results would normally be expected to be
9631 generated in the same order as in the original scalar computation,
9632 i.e. if 8 results are generated in each vector iteration, they are
9633 to be organized as follows:
9634 vect1: [res1,res2,res3,res4],
9635 vect2: [res5,res6,res7,res8].
9636
9637 However, in the special case that the result of the widening
9638 operation is used in a reduction computation only, the order doesn't
9639 matter (because when vectorizing a reduction we change the order of
9640 the computation). Some targets can take advantage of this and
9641 generate more efficient code. For example, targets like Altivec,
9642 that support widen_mult using a sequence of {mult_even,mult_odd}
9643 generate the following vectors:
9644 vect1: [res1,res3,res5,res7],
9645 vect2: [res2,res4,res6,res8].
9646
9647 When vectorizing outer-loops, we execute the inner-loop sequentially
9648 (each vectorized inner-loop iteration contributes to VF outer-loop
9649 iterations in parallel). We therefore don't allow to change the
9650 order of the computation in the inner-loop during outer-loop
9651 vectorization. */
9652 /* TODO: Another case in which order doesn't *really* matter is when we
9653 widen and then contract again, e.g. (short)((int)x * y >> 8).
9654 Normally, pack_trunc performs an even/odd permute, whereas the
9655 repack from an even/odd expansion would be an interleave, which
9656 would be significantly simpler for e.g. AVX2. */
9657 /* In any case, in order to avoid duplicating the code below, recurse
9658 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9659 are properly set up for the caller. If we fail, we'll continue with
9660 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9661 if (vect_loop
9662 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9663 && !nested_in_vect_loop_p (vect_loop, stmt)
9664 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9665 stmt, vectype_out, vectype_in,
a86ec597
RH
9666 code1, code2, multi_step_cvt,
9667 interm_types))
ebc047a2
CH
9668 {
9669 /* Elements in a vector with vect_used_by_reduction property cannot
9670 be reordered if the use chain with this property does not have the
9671 same operation. One such an example is s += a * b, where elements
9672 in a and b cannot be reordered. Here we check if the vector defined
9673 by STMT is only directly used in the reduction statement. */
9674 tree lhs = gimple_assign_lhs (stmt);
9675 use_operand_p dummy;
355fe088 9676 gimple *use_stmt;
ebc047a2
CH
9677 stmt_vec_info use_stmt_info = NULL;
9678 if (single_imm_use (lhs, &dummy, &use_stmt)
9679 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9680 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9681 return true;
9682 }
4a00c761
JJ
9683 c1 = VEC_WIDEN_MULT_LO_EXPR;
9684 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
9685 break;
9686
81c40241
RB
9687 case DOT_PROD_EXPR:
9688 c1 = DOT_PROD_EXPR;
9689 c2 = DOT_PROD_EXPR;
9690 break;
9691
9692 case SAD_EXPR:
9693 c1 = SAD_EXPR;
9694 c2 = SAD_EXPR;
9695 break;
9696
6ae6116f
RH
9697 case VEC_WIDEN_MULT_EVEN_EXPR:
9698 /* Support the recursion induced just above. */
9699 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9700 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9701 break;
9702
36ba4aae 9703 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
9704 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9705 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
9706 break;
9707
ebfd146a 9708 CASE_CONVERT:
4a00c761
JJ
9709 c1 = VEC_UNPACK_LO_EXPR;
9710 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
9711 break;
9712
9713 case FLOAT_EXPR:
4a00c761
JJ
9714 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9715 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
9716 break;
9717
9718 case FIX_TRUNC_EXPR:
9719 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9720 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9721 computing the operation. */
9722 return false;
9723
9724 default:
9725 gcc_unreachable ();
9726 }
9727
6ae6116f 9728 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 9729 std::swap (c1, c2);
4a00c761 9730
ebfd146a
IR
9731 if (code == FIX_TRUNC_EXPR)
9732 {
9733 /* The signedness is determined from output operand. */
b690cc0f
RG
9734 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9735 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
9736 }
9737 else
9738 {
9739 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9740 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9741 }
9742
9743 if (!optab1 || !optab2)
9744 return false;
9745
9746 vec_mode = TYPE_MODE (vectype);
947131ba
RS
9747 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9748 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9749 return false;
9750
4a00c761
JJ
9751 *code1 = c1;
9752 *code2 = c2;
9753
9754 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9755 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9756 /* For scalar masks we may have different boolean
9757 vector types having the same QImode. Thus we
9758 add additional check for elements number. */
9759 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9760 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
9761 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 9762
b8698a0f 9763 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 9764 types. */
ebfd146a 9765
4a00c761
JJ
9766 prev_type = vectype;
9767 prev_mode = vec_mode;
b8698a0f 9768
4a00c761
JJ
9769 if (!CONVERT_EXPR_CODE_P (code))
9770 return false;
b8698a0f 9771
4a00c761
JJ
9772 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9773 intermediate steps in promotion sequence. We try
9774 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9775 not. */
9771b263 9776 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9777 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9778 {
9779 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9780 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9781 {
7cfb4d93 9782 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
9783 if (intermediate_mode != TYPE_MODE (intermediate_type))
9784 return false;
9785 }
9786 else
9787 intermediate_type
9788 = lang_hooks.types.type_for_mode (intermediate_mode,
9789 TYPE_UNSIGNED (prev_type));
9790
4a00c761
JJ
9791 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9792 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9793
9794 if (!optab3 || !optab4
9795 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9796 || insn_data[icode1].operand[0].mode != intermediate_mode
9797 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9798 || insn_data[icode2].operand[0].mode != intermediate_mode
9799 || ((icode1 = optab_handler (optab3, intermediate_mode))
9800 == CODE_FOR_nothing)
9801 || ((icode2 = optab_handler (optab4, intermediate_mode))
9802 == CODE_FOR_nothing))
9803 break;
ebfd146a 9804
9771b263 9805 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9806 (*multi_step_cvt)++;
9807
9808 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9809 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 9810 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9811 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
9812 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
9813
9814 prev_type = intermediate_type;
9815 prev_mode = intermediate_mode;
ebfd146a
IR
9816 }
9817
9771b263 9818 interm_types->release ();
4a00c761 9819 return false;
ebfd146a
IR
9820}
9821
9822
9823/* Function supportable_narrowing_operation
9824
b8698a0f
L
9825 Check whether an operation represented by the code CODE is a
9826 narrowing operation that is supported by the target platform in
b690cc0f
RG
9827 vector form (i.e., when operating on arguments of type VECTYPE_IN
9828 and producing a result of type VECTYPE_OUT).
b8698a0f 9829
ebfd146a 9830 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9831 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9832 the target platform directly via vector tree-codes.
9833
9834 Output:
b8698a0f
L
9835 - CODE1 is the code of a vector operation to be used when
9836 vectorizing the operation, if available.
ebfd146a
IR
9837 - MULTI_STEP_CVT determines the number of required intermediate steps in
9838 case of multi-step conversion (like int->short->char - in that case
9839 MULTI_STEP_CVT will be 1).
9840 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9841 narrowing operation (short in the above example). */
ebfd146a
IR
9842
9843bool
9844supportable_narrowing_operation (enum tree_code code,
b690cc0f 9845 tree vectype_out, tree vectype_in,
ebfd146a 9846 enum tree_code *code1, int *multi_step_cvt,
9771b263 9847 vec<tree> *interm_types)
ebfd146a 9848{
ef4bddc2 9849 machine_mode vec_mode;
ebfd146a
IR
9850 enum insn_code icode1;
9851 optab optab1, interm_optab;
b690cc0f
RG
9852 tree vectype = vectype_in;
9853 tree narrow_vectype = vectype_out;
ebfd146a 9854 enum tree_code c1;
3ae0661a 9855 tree intermediate_type, prev_type;
ef4bddc2 9856 machine_mode intermediate_mode, prev_mode;
ebfd146a 9857 int i;
4a00c761 9858 bool uns;
ebfd146a 9859
4a00c761 9860 *multi_step_cvt = 0;
ebfd146a
IR
9861 switch (code)
9862 {
9863 CASE_CONVERT:
9864 c1 = VEC_PACK_TRUNC_EXPR;
9865 break;
9866
9867 case FIX_TRUNC_EXPR:
9868 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9869 break;
9870
9871 case FLOAT_EXPR:
9872 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9873 tree code and optabs used for computing the operation. */
9874 return false;
9875
9876 default:
9877 gcc_unreachable ();
9878 }
9879
9880 if (code == FIX_TRUNC_EXPR)
9881 /* The signedness is determined from output operand. */
b690cc0f 9882 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9883 else
9884 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9885
9886 if (!optab1)
9887 return false;
9888
9889 vec_mode = TYPE_MODE (vectype);
947131ba 9890 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9891 return false;
9892
4a00c761
JJ
9893 *code1 = c1;
9894
9895 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9896 /* For scalar masks we may have different boolean
9897 vector types having the same QImode. Thus we
9898 add additional check for elements number. */
9899 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9900 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
9901 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 9902
ebfd146a
IR
9903 /* Check if it's a multi-step conversion that can be done using intermediate
9904 types. */
4a00c761 9905 prev_mode = vec_mode;
3ae0661a 9906 prev_type = vectype;
4a00c761
JJ
9907 if (code == FIX_TRUNC_EXPR)
9908 uns = TYPE_UNSIGNED (vectype_out);
9909 else
9910 uns = TYPE_UNSIGNED (vectype);
9911
9912 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9913 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9914 costly than signed. */
9915 if (code == FIX_TRUNC_EXPR && uns)
9916 {
9917 enum insn_code icode2;
9918
9919 intermediate_type
9920 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9921 interm_optab
9922 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9923 if (interm_optab != unknown_optab
4a00c761
JJ
9924 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9925 && insn_data[icode1].operand[0].mode
9926 == insn_data[icode2].operand[0].mode)
9927 {
9928 uns = false;
9929 optab1 = interm_optab;
9930 icode1 = icode2;
9931 }
9932 }
ebfd146a 9933
4a00c761
JJ
9934 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9935 intermediate steps in promotion sequence. We try
9936 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9937 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9938 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9939 {
9940 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9941 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9942 {
7cfb4d93 9943 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 9944 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 9945 return false;
3ae0661a
IE
9946 }
9947 else
9948 intermediate_type
9949 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
9950 interm_optab
9951 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9952 optab_default);
9953 if (!interm_optab
9954 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9955 || insn_data[icode1].operand[0].mode != intermediate_mode
9956 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9957 == CODE_FOR_nothing))
9958 break;
9959
9771b263 9960 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9961 (*multi_step_cvt)++;
9962
9963 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 9964 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
9965 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
9966 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
9967
9968 prev_mode = intermediate_mode;
3ae0661a 9969 prev_type = intermediate_type;
4a00c761 9970 optab1 = interm_optab;
ebfd146a
IR
9971 }
9972
9771b263 9973 interm_types->release ();
4a00c761 9974 return false;
ebfd146a 9975}
7cfb4d93
RS
9976
9977/* Generate and return a statement that sets vector mask MASK such that
9978 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
9979
9980gcall *
9981vect_gen_while (tree mask, tree start_index, tree end_index)
9982{
9983 tree cmp_type = TREE_TYPE (start_index);
9984 tree mask_type = TREE_TYPE (mask);
9985 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
9986 cmp_type, mask_type,
9987 OPTIMIZE_FOR_SPEED));
9988 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
9989 start_index, end_index,
9990 build_zero_cst (mask_type));
9991 gimple_call_set_lhs (call, mask);
9992 return call;
9993}
535e7c11
RS
9994
9995/* Generate a vector mask of type MASK_TYPE for which index I is false iff
9996 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
9997
9998tree
9999vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10000 tree end_index)
10001{
10002 tree tmp = make_ssa_name (mask_type);
10003 gcall *call = vect_gen_while (tmp, start_index, end_index);
10004 gimple_seq_add_stmt (seq, call);
10005 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10006}