]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR tree-optimization/86144 (GCC is not generating vector math calls to svml/acml...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2 100
211ee39b 101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
68435eb2
RB
102 body_cost_vec->safe_push (si);
103
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
107}
108
272c6793
RS
109/* Return a variable of type ELEM_TYPE[NELEMS]. */
110
111static tree
112create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
113{
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
116}
117
118/* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
82570274 120 is part of the vectorization of STMT_INFO and the vector is associated
272c6793
RS
121 with scalar destination SCALAR_DEST. */
122
123static tree
82570274
RS
124read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
125 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
272c6793
RS
126{
127 tree vect_type, vect, vect_name, array_ref;
355fe088 128 gimple *new_stmt;
272c6793
RS
129
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
136
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
82570274 140 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
272c6793
RS
141
142 return vect_name;
143}
144
145/* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
82570274 147 The store is part of the vectorization of STMT_INFO. */
272c6793
RS
148
149static void
82570274
RS
150write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
151 tree vect, tree array, unsigned HOST_WIDE_INT n)
272c6793
RS
152{
153 tree array_ref;
355fe088 154 gimple *new_stmt;
272c6793
RS
155
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
159
160 new_stmt = gimple_build_assign (array_ref, vect);
82570274 161 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
272c6793
RS
162}
163
164/* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
167
168static tree
44fc7854 169create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 170{
44fc7854 171 tree mem_ref;
272c6793 172
272c6793
RS
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
644ffefd 175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
176 return mem_ref;
177}
178
82570274 179/* Add a clobber of variable VAR to the vectorization of STMT_INFO.
3ba4ff41
RS
180 Emit the clobber before *GSI. */
181
182static void
82570274
RS
183vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
184 tree var)
3ba4ff41
RS
185{
186 tree clobber = build_clobber (TREE_TYPE (var));
187 gimple *new_stmt = gimple_build_assign (var, clobber);
82570274 188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3ba4ff41
RS
189}
190
ebfd146a
IR
191/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193/* Function vect_mark_relevant.
194
32e8e429 195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
ebfd146a
IR
196
197static void
32e8e429 198vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
97ecdb46 199 enum vect_relevant relevant, bool live_p)
ebfd146a 200{
ebfd146a
IR
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
203
73fbfcad 204 if (dump_enabled_p ())
3c2a8ed0
DM
205 dump_printf_loc (MSG_NOTE, vect_location,
206 "mark relevant %d, live %d: %G", relevant, live_p,
207 stmt_info->stmt);
ebfd146a 208
83197f37
IR
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
ebfd146a
IR
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
214 {
97ecdb46
JJ
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
83197f37 219
97ecdb46
JJ
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
10681ce8
RS
224 stmt_vec_info old_stmt_info = stmt_info;
225 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
97ecdb46
JJ
227 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
ebfd146a
IR
229 }
230
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
234
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237 {
73fbfcad 238 if (dump_enabled_p ())
78c60e3d 239 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 240 "already marked relevant/live.\n");
ebfd146a
IR
241 return;
242 }
243
86a91c0a 244 worklist->safe_push (stmt_info);
ebfd146a
IR
245}
246
247
b28ead45
AH
248/* Function is_simple_and_all_uses_invariant
249
32e8e429 250 Return true if STMT_INFO is simple and all uses of it are invariant. */
b28ead45
AH
251
252bool
32e8e429
RS
253is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
254 loop_vec_info loop_vinfo)
b28ead45
AH
255{
256 tree op;
b28ead45
AH
257 ssa_op_iter iter;
258
32e8e429
RS
259 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
260 if (!stmt)
b28ead45
AH
261 return false;
262
263 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
264 {
265 enum vect_def_type dt = vect_uninitialized_def;
266
894dd753 267 if (!vect_is_simple_use (op, loop_vinfo, &dt))
b28ead45
AH
268 {
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
271 "use not simple.\n");
272 return false;
273 }
274
275 if (dt != vect_external_def && dt != vect_constant_def)
276 return false;
277 }
278 return true;
279}
280
ebfd146a
IR
281/* Function vect_stmt_relevant_p.
282
82570274
RS
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
ebfd146a
IR
285
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
290
291 CHECKME: what other side effects would the vectorizer allow? */
292
293static bool
82570274 294vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
ebfd146a
IR
295 enum vect_relevant *relevant, bool *live_p)
296{
297 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 ssa_op_iter op_iter;
299 imm_use_iterator imm_iter;
300 use_operand_p use_p;
301 def_operand_p def_p;
302
8644a673 303 *relevant = vect_unused_in_scope;
ebfd146a
IR
304 *live_p = false;
305
306 /* cond stmt other than loop exit cond. */
82570274
RS
307 if (is_ctrl_stmt (stmt_info->stmt)
308 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
8644a673 309 *relevant = vect_used_in_scope;
ebfd146a
IR
310
311 /* changing memory. */
82570274
RS
312 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt_info->stmt)
314 && !gimple_clobber_p (stmt_info->stmt))
ebfd146a 315 {
73fbfcad 316 if (dump_enabled_p ())
78c60e3d 317 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 318 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 319 *relevant = vect_used_in_scope;
ebfd146a
IR
320 }
321
322 /* uses outside the loop. */
82570274 323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
ebfd146a
IR
324 {
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 {
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
329 {
73fbfcad 330 if (dump_enabled_p ())
78c60e3d 331 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 332 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 333
3157b0c2
AO
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
336
ebfd146a
IR
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
341
342 *live_p = true;
343 }
344 }
345 }
346
3a2edf4c 347 if (*live_p && *relevant == vect_unused_in_scope
82570274 348 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
b28ead45
AH
349 {
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
354 }
355
ebfd146a
IR
356 return (*live_p || *relevant);
357}
358
359
b8698a0f 360/* Function exist_non_indexing_operands_for_use_p
ebfd146a 361
32e8e429
RS
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
ebfd146a
IR
364
365static bool
32e8e429 366exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
ebfd146a
IR
367{
368 tree operand;
59a05b0c 369
ff802fa1 370 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info))
374 return true;
59a05b0c 375
ebfd146a
IR
376 /* STMT has a data_ref. FORNOW this means that its of one of
377 the following forms:
378 -1- ARRAY_REF = var
379 -2- var = ARRAY_REF
380 (This should have been verified in analyze_data_refs).
381
382 'var' in the second case corresponds to a def, not a use,
b8698a0f 383 so USE cannot correspond to any operands that are not used
ebfd146a
IR
384 for array indexing.
385
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
ebfd146a 388
86a91c0a 389 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
beb456c3 390 if (!assign || !gimple_assign_copy_p (assign))
5ce9450f 391 {
86a91c0a 392 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
beb456c3 393 if (call && gimple_call_internal_p (call))
bfaa08b7 394 {
beb456c3 395 internal_fn ifn = gimple_call_internal_fn (call);
bfaa08b7
RS
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
beb456c3 398 && use == gimple_call_arg (call, mask_index))
bfaa08b7 399 return true;
f307441a
RS
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
beb456c3 402 && use == gimple_call_arg (call, stored_value_index))
f307441a 403 return true;
bfaa08b7 404 if (internal_gather_scatter_fn_p (ifn)
beb456c3 405 && use == gimple_call_arg (call, 1))
bfaa08b7 406 return true;
bfaa08b7 407 }
5ce9450f
JJ
408 return false;
409 }
410
beb456c3 411 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
59a05b0c 412 return false;
beb456c3 413 operand = gimple_assign_rhs1 (assign);
ebfd146a
IR
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
416
417 if (operand == use)
418 return true;
419
420 return false;
421}
422
423
b8698a0f 424/*
ebfd146a
IR
425 Function process_use.
426
427 Inputs:
32e8e429 428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
b28ead45 429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 430 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
ebfd146a
IR
434
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
32e8e429
RS
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
ebfd146a
IR
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 442 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 443 of the respective DEF_STMT is left unchanged.
32e8e429
RS
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
ebfd146a
IR
448
449 Return true if everything is as expected. Return false otherwise. */
450
f4ebbd24 451static opt_result
32e8e429 452process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
eca52fdd 453 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
aec7ae7d 454 bool force)
ebfd146a 455{
ebfd146a
IR
456 stmt_vec_info dstmt_vinfo;
457 basic_block bb, def_bb;
ebfd146a
IR
458 enum vect_def_type dt;
459
b8698a0f 460 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 461 that are used for address computation are not considered relevant. */
86a91c0a 462 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
f4ebbd24 463 return opt_result::success ();
ebfd146a 464
fef96d8e 465 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
f4ebbd24
DM
466 return opt_result::failure_at (stmt_vinfo->stmt,
467 "not vectorized:"
468 " unsupported use in stmt.\n");
ebfd146a 469
fef96d8e 470 if (!dstmt_vinfo)
f4ebbd24 471 return opt_result::success ();
ebfd146a 472
fef96d8e 473 def_bb = gimple_bb (dstmt_vinfo->stmt);
ebfd146a 474
fef96d8e
RS
475 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
476 DSTMT_VINFO must have already been processed, because this should be the
b8698a0f 477 only way that STMT, which is a reduction-phi, was put in the worklist,
fef96d8e 478 as there should be no other uses for DSTMT_VINFO in the loop. So we just
ebfd146a 479 check that everything is as expected, and we are done. */
86a91c0a
RS
480 bb = gimple_bb (stmt_vinfo->stmt);
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
ebfd146a 482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
fef96d8e 483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
ebfd146a
IR
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
486 {
73fbfcad 487 if (dump_enabled_p ())
78c60e3d 488 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 489 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a 490 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 491 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 492 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
f4ebbd24 493 return opt_result::success ();
ebfd146a
IR
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
fef96d8e 498 d = dstmt_vinfo
ebfd146a
IR
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
73fbfcad 505 if (dump_enabled_p ())
78c60e3d 506 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 507 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 508
ebfd146a
IR
509 switch (relevant)
510 {
8644a673 511 case vect_unused_in_scope:
7c5222ff
IR
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 514 break;
7c5222ff 515
ebfd146a 516 case vect_used_in_outer_by_reduction:
7c5222ff 517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
518 relevant = vect_used_by_reduction;
519 break;
7c5222ff 520
ebfd146a 521 case vect_used_in_outer:
7c5222ff 522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 523 relevant = vect_used_in_scope;
ebfd146a 524 break;
7c5222ff 525
8644a673 526 case vect_used_in_scope:
ebfd146a
IR
527 break;
528
529 default:
530 gcc_unreachable ();
b8698a0f 531 }
ebfd146a
IR
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
fef96d8e 538 d = dstmt_vinfo
06066f92 539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
73fbfcad 543 if (dump_enabled_p ())
78c60e3d 544 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 545 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 546
ebfd146a
IR
547 switch (relevant)
548 {
8644a673 549 case vect_unused_in_scope:
b8698a0f 550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
553 break;
554
ebfd146a 555 case vect_used_by_reduction:
b28ead45 556 case vect_used_only_live:
ebfd146a
IR
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
8644a673 560 case vect_used_in_scope:
ebfd146a
IR
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
643a9684
RB
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
86a91c0a 572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
643a9684 573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
86a91c0a
RS
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
643a9684
RB
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
f4ebbd24 582 return opt_result::success ();
643a9684
RB
583 }
584
ebfd146a 585
fef96d8e 586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
f4ebbd24 587 return opt_result::success ();
ebfd146a
IR
588}
589
590
591/* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
f4ebbd24 607opt_result
ebfd146a
IR
608vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
609{
ebfd146a
IR
610 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
ebfd146a 614 unsigned int i;
ebfd146a 615 basic_block bb;
ebfd146a 616 bool live_p;
b28ead45 617 enum vect_relevant relevant;
ebfd146a 618
adac3a68 619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
ebfd146a 620
eca52fdd 621 auto_vec<stmt_vec_info, 64> worklist;
ebfd146a
IR
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 628 {
a1824cfd 629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
73fbfcad 630 if (dump_enabled_p ())
3c2a8ed0
DM
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
ebfd146a 633
a1824cfd
RS
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
ebfd146a
IR
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
a1824cfd 639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
73fbfcad 640 if (dump_enabled_p ())
3c2a8ed0
DM
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
ebfd146a 643
a1824cfd
RS
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
ebfd146a
IR
646 }
647 }
648
649 /* 2. Process_worklist */
9771b263 650 while (worklist.length () > 0)
ebfd146a
IR
651 {
652 use_operand_p use_p;
653 ssa_op_iter iter;
654
eca52fdd 655 stmt_vec_info stmt_vinfo = worklist.pop ();
73fbfcad 656 if (dump_enabled_p ())
3c2a8ed0
DM
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
ebfd146a 659
b8698a0f 660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
ebfd146a 663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 664
b28ead45
AH
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
667
668 One exception is when STMT has been identified as defining a reduction
b28ead45 669 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 670 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 671 those that are used by a reduction computation, and those that are
ff802fa1 672 (also) used by a regular computation. This allows us later on to
b8698a0f 673 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 674 order of the results that they produce does not have to be kept. */
ebfd146a 675
b28ead45 676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 677 {
06066f92 678 case vect_reduction_def:
b28ead45
AH
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
f4ebbd24
DM
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
06066f92 686 break;
b8698a0f 687
06066f92 688 case vect_nested_cycle:
b28ead45
AH
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
f4ebbd24
DM
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
b8698a0f
L
694 break;
695
06066f92 696 case vect_double_reduction_def:
b28ead45
AH
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
f4ebbd24
DM
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
b8698a0f 702 break;
7c5222ff 703
06066f92
IR
704 default:
705 break;
7c5222ff 706 }
b8698a0f 707
aec7ae7d 708 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
709 {
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
86a91c0a 713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
beb456c3
RS
714 {
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
69d2aade
JJ
717
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
720 {
f4ebbd24
DM
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
69d2aade
JJ
730 i = 2;
731 }
beb456c3
RS
732 for (; i < gimple_num_ops (assign); i++)
733 {
734 op = gimple_op (assign, i);
f4ebbd24
DM
735 if (TREE_CODE (op) == SSA_NAME)
736 {
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
742 }
9d5e7640
IR
743 }
744 }
86a91c0a 745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
beb456c3
RS
746 {
747 for (i = 0; i < gimple_call_num_args (call); i++)
748 {
749 tree arg = gimple_call_arg (call, i);
f4ebbd24
DM
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
beb456c3
RS
755 }
756 }
9d5e7640
IR
757 }
758 else
86a91c0a 759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
9d5e7640
IR
760 {
761 tree op = USE_FROM_PTR (use_p);
f4ebbd24
DM
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
9d5e7640 767 }
aec7ae7d 768
3bab6342 769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 770 {
134c85ca 771 gather_scatter_info gs_info;
86a91c0a 772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
134c85ca 773 gcc_unreachable ();
f4ebbd24
DM
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 return res;
aec7ae7d 779 }
ebfd146a
IR
780 } /* while worklist */
781
f4ebbd24 782 return opt_result::success ();
ebfd146a
IR
783}
784
68435eb2
RB
785/* Compute the prologue cost for invariant or constant operands. */
786
787static unsigned
788vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
789 unsigned opno, enum vect_def_type dt,
790 stmt_vector_for_cost *cost_vec)
791{
b9787581 792 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
68435eb2
RB
793 tree op = gimple_op (stmt, opno);
794 unsigned prologue_cost = 0;
795
796 /* Without looking at the actual initializer a vector of
797 constants can be implemented as load from the constant pool.
798 When all elements are the same we can use a splat. */
799 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
800 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
801 unsigned num_vects_to_check;
802 unsigned HOST_WIDE_INT const_nunits;
803 unsigned nelt_limit;
804 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
805 && ! multiple_p (const_nunits, group_size))
806 {
807 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
808 nelt_limit = const_nunits;
809 }
810 else
811 {
812 /* If either the vector has variable length or the vectors
813 are composed of repeated whole groups we only need to
814 cost construction once. All vectors will be the same. */
815 num_vects_to_check = 1;
816 nelt_limit = group_size;
817 }
818 tree elt = NULL_TREE;
819 unsigned nelt = 0;
820 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
821 {
822 unsigned si = j % group_size;
823 if (nelt == 0)
b9787581 824 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
68435eb2
RB
825 /* ??? We're just tracking whether all operands of a single
826 vector initializer are the same, ideally we'd check if
827 we emitted the same one already. */
b9787581 828 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
68435eb2
RB
829 opno))
830 elt = NULL_TREE;
831 nelt++;
832 if (nelt == nelt_limit)
833 {
834 /* ??? We need to pass down stmt_info for a vector type
835 even if it points to the wrong stmt. */
836 prologue_cost += record_stmt_cost
837 (cost_vec, 1,
838 dt == vect_external_def
839 ? (elt ? scalar_to_vec : vec_construct)
840 : vector_load,
841 stmt_info, 0, vect_prologue);
842 nelt = 0;
843 }
844 }
845
846 return prologue_cost;
847}
ebfd146a 848
b8698a0f 849/* Function vect_model_simple_cost.
ebfd146a 850
b8698a0f 851 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
852 single op. Right now, this does not account for multiple insns that could
853 be generated for the single vector op. We will handle that shortly. */
854
68435eb2 855static void
b8698a0f 856vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 857 enum vect_def_type *dt,
4fc5ebf1 858 int ndts,
68435eb2
RB
859 slp_tree node,
860 stmt_vector_for_cost *cost_vec)
ebfd146a 861{
92345349 862 int inside_cost = 0, prologue_cost = 0;
ebfd146a 863
68435eb2 864 gcc_assert (cost_vec != NULL);
ebfd146a 865
68435eb2
RB
866 /* ??? Somehow we need to fix this at the callers. */
867 if (node)
868 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
869
870 if (node)
871 {
872 /* Scan operands and account for prologue cost of constants/externals.
873 ??? This over-estimates cost for multiple uses and should be
874 re-engineered. */
b9787581 875 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
68435eb2
RB
876 tree lhs = gimple_get_lhs (stmt);
877 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
878 {
879 tree op = gimple_op (stmt, i);
68435eb2
RB
880 enum vect_def_type dt;
881 if (!op || op == lhs)
882 continue;
894dd753 883 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
68435eb2
RB
884 && (dt == vect_constant_def || dt == vect_external_def))
885 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
886 i, dt, cost_vec);
887 }
888 }
889 else
890 /* Cost the "broadcast" of a scalar operand in to a vector operand.
891 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
892 cost model. */
893 for (int i = 0; i < ndts; i++)
894 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
895 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
896 stmt_info, 0, vect_prologue);
897
898 /* Adjust for two-operator SLP nodes. */
899 if (node && SLP_TREE_TWO_OPERATORS (node))
900 {
901 ncopies *= 2;
902 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
903 stmt_info, 0, vect_body);
904 }
c3e7ee41
BS
905
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
907 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
908 stmt_info, 0, vect_body);
c3e7ee41 909
73fbfcad 910 if (dump_enabled_p ())
78c60e3d
SS
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_simple_cost: inside_cost = %d, "
e645e942 913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
914}
915
916
8bd37302
BS
917/* Model cost for type demotion and promotion operations. PWR is normally
918 zero for single-step promotions and demotions. It will be one if
919 two-step promotion/demotion is required, and so on. Each additional
920 step doubles the number of instructions required. */
921
922static void
923vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
924 enum vect_def_type *dt, int pwr,
925 stmt_vector_for_cost *cost_vec)
8bd37302
BS
926{
927 int i, tmp;
92345349 928 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 929
8bd37302
BS
930 for (i = 0; i < pwr + 1; i++)
931 {
932 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
933 (i + 1) : i;
68435eb2
RB
934 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
935 vec_promote_demote, stmt_info, 0,
936 vect_body);
8bd37302
BS
937 }
938
939 /* FORNOW: Assuming maximum 2 args per stmts. */
940 for (i = 0; i < 2; i++)
92345349 941 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
942 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
943 stmt_info, 0, vect_prologue);
8bd37302 944
73fbfcad 945 if (dump_enabled_p ())
78c60e3d
SS
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 948 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
949}
950
ebfd146a
IR
951/* Function vect_model_store_cost
952
0d0293ac
MM
953 Models cost for stores. In the case of grouped accesses, one access
954 has the overhead of the grouped access attributed to it. */
ebfd146a 955
68435eb2 956static void
b8698a0f 957vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 958 enum vect_def_type dt,
2de001ee 959 vect_memory_access_type memory_access_type,
9ce4345a 960 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 961 stmt_vector_for_cost *cost_vec)
ebfd146a 962{
92345349 963 unsigned int inside_cost = 0, prologue_cost = 0;
bffb8014 964 stmt_vec_info first_stmt_info = stmt_info;
892a981f 965 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 966
68435eb2
RB
967 /* ??? Somehow we need to fix this at the callers. */
968 if (slp_node)
969 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
970
9ce4345a 971 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
972 {
973 if (slp_node)
974 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
975 1, dt, cost_vec);
976 else
977 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
978 stmt_info, 0, vect_prologue);
979 }
ebfd146a 980
892a981f
RS
981 /* Grouped stores update all elements in the group at once,
982 so we want the DR for the first statement. */
983 if (!slp_node && grouped_access_p)
bffb8014 984 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 985
892a981f
RS
986 /* True if we should include any once-per-group costs as well as
987 the cost of the statement itself. For SLP we only get called
988 once per group anyhow. */
bffb8014 989 bool first_stmt_p = (first_stmt_info == stmt_info);
892a981f 990
272c6793 991 /* We assume that the cost of a single store-lanes instruction is
2c53b149 992 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
272c6793 993 access is instead being provided by a permute-and-store operation,
2de001ee
RS
994 include the cost of the permutes. */
995 if (first_stmt_p
996 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 997 {
e1377713
ES
998 /* Uses a high and low interleave or shuffle operations for each
999 needed permute. */
bffb8014 1000 int group_size = DR_GROUP_SIZE (first_stmt_info);
e1377713 1001 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1002 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1003 stmt_info, 0, vect_body);
ebfd146a 1004
73fbfcad 1005 if (dump_enabled_p ())
78c60e3d 1006 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1007 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1008 group_size);
ebfd146a
IR
1009 }
1010
cee62fee 1011 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1012 /* Costs of the stores. */
067bc855
RB
1013 if (memory_access_type == VMAT_ELEMENTWISE
1014 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1015 {
1016 /* N scalar stores plus extracting the elements. */
1017 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1018 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1019 ncopies * assumed_nunits,
1020 scalar_store, stmt_info, 0, vect_body);
1021 }
f2e2a985 1022 else
57c454d2 1023 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
ebfd146a 1024
2de001ee
RS
1025 if (memory_access_type == VMAT_ELEMENTWISE
1026 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1027 {
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1030 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1031 ncopies * assumed_nunits,
1032 vec_to_scalar, stmt_info, 0, vect_body);
1033 }
cee62fee 1034
73fbfcad 1035 if (dump_enabled_p ())
78c60e3d
SS
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_store_cost: inside_cost = %d, "
e645e942 1038 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1039}
1040
1041
720f5239
IR
1042/* Calculate cost of DR's memory access. */
1043void
57c454d2 1044vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1045 unsigned int *inside_cost,
92345349 1046 stmt_vector_for_cost *body_cost_vec)
720f5239 1047{
89fa689a
RS
1048 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1049 int alignment_support_scheme
1050 = vect_supportable_dr_alignment (dr_info, false);
720f5239
IR
1051
1052 switch (alignment_support_scheme)
1053 {
1054 case dr_aligned:
1055 {
92345349
BS
1056 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1057 vector_store, stmt_info, 0,
1058 vect_body);
720f5239 1059
73fbfcad 1060 if (dump_enabled_p ())
78c60e3d 1061 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1062 "vect_model_store_cost: aligned.\n");
720f5239
IR
1063 break;
1064 }
1065
1066 case dr_unaligned_supported:
1067 {
720f5239 1068 /* Here, we assign an additional cost for the unaligned store. */
92345349 1069 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1070 unaligned_store, stmt_info,
89fa689a
RS
1071 DR_MISALIGNMENT (dr_info),
1072 vect_body);
73fbfcad 1073 if (dump_enabled_p ())
78c60e3d
SS
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "vect_model_store_cost: unaligned supported by "
e645e942 1076 "hardware.\n");
720f5239
IR
1077 break;
1078 }
1079
38eec4c6
UW
1080 case dr_unaligned_unsupported:
1081 {
1082 *inside_cost = VECT_MAX_COST;
1083
73fbfcad 1084 if (dump_enabled_p ())
78c60e3d 1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1086 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1087 break;
1088 }
1089
720f5239
IR
1090 default:
1091 gcc_unreachable ();
1092 }
1093}
1094
1095
ebfd146a
IR
1096/* Function vect_model_load_cost
1097
892a981f
RS
1098 Models cost for loads. In the case of grouped accesses, one access has
1099 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1100 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1101 access scheme chosen. */
1102
68435eb2
RB
1103static void
1104vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1105 vect_memory_access_type memory_access_type,
68435eb2 1106 slp_instance instance,
2de001ee 1107 slp_tree slp_node,
68435eb2 1108 stmt_vector_for_cost *cost_vec)
ebfd146a 1109{
92345349 1110 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1111 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1112
68435eb2
RB
1113 gcc_assert (cost_vec);
1114
1115 /* ??? Somehow we need to fix this at the callers. */
1116 if (slp_node)
1117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1118
1119 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1120 {
1121 /* If the load is permuted then the alignment is determined by
1122 the first group element not by the first scalar stmt DR. */
bffb8014 1123 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
68435eb2
RB
1124 /* Record the cost for the permutation. */
1125 unsigned n_perms;
1126 unsigned assumed_nunits
bffb8014 1127 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
68435eb2
RB
1128 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1129 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1130 slp_vf, instance, true,
1131 &n_perms);
1132 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
bffb8014 1133 first_stmt_info, 0, vect_body);
68435eb2
RB
1134 /* And adjust the number of loads performed. This handles
1135 redundancies as well as loads that are later dead. */
bffb8014 1136 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
68435eb2
RB
1137 bitmap_clear (perm);
1138 for (unsigned i = 0;
1139 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1140 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1141 ncopies = 0;
1142 bool load_seen = false;
bffb8014 1143 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
68435eb2
RB
1144 {
1145 if (i % assumed_nunits == 0)
1146 {
1147 if (load_seen)
1148 ncopies++;
1149 load_seen = false;
1150 }
1151 if (bitmap_bit_p (perm, i))
1152 load_seen = true;
1153 }
1154 if (load_seen)
1155 ncopies++;
1156 gcc_assert (ncopies
bffb8014
RS
1157 <= (DR_GROUP_SIZE (first_stmt_info)
1158 - DR_GROUP_GAP (first_stmt_info)
68435eb2
RB
1159 + assumed_nunits - 1) / assumed_nunits);
1160 }
1161
892a981f
RS
1162 /* Grouped loads read all elements in the group at once,
1163 so we want the DR for the first statement. */
bffb8014 1164 stmt_vec_info first_stmt_info = stmt_info;
892a981f 1165 if (!slp_node && grouped_access_p)
bffb8014 1166 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1167
892a981f
RS
1168 /* True if we should include any once-per-group costs as well as
1169 the cost of the statement itself. For SLP we only get called
1170 once per group anyhow. */
bffb8014 1171 bool first_stmt_p = (first_stmt_info == stmt_info);
892a981f 1172
272c6793 1173 /* We assume that the cost of a single load-lanes instruction is
2c53b149 1174 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
272c6793 1175 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1176 include the cost of the permutes. */
1177 if (first_stmt_p
1178 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1179 {
2c23db6d
ES
1180 /* Uses an even and odd extract operations or shuffle operations
1181 for each needed permute. */
bffb8014 1182 int group_size = DR_GROUP_SIZE (first_stmt_info);
2c23db6d 1183 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1184 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1185 stmt_info, 0, vect_body);
ebfd146a 1186
73fbfcad 1187 if (dump_enabled_p ())
e645e942
TJ
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1190 group_size);
ebfd146a
IR
1191 }
1192
1193 /* The loads themselves. */
067bc855
RB
1194 if (memory_access_type == VMAT_ELEMENTWISE
1195 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1196 {
a21892ad
BS
1197 /* N scalar loads plus gathering them into a vector. */
1198 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1199 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1200 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1201 ncopies * assumed_nunits,
92345349 1202 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1203 }
1204 else
57c454d2 1205 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
92345349 1206 &inside_cost, &prologue_cost,
68435eb2 1207 cost_vec, cost_vec, true);
2de001ee
RS
1208 if (memory_access_type == VMAT_ELEMENTWISE
1209 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1210 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1211 stmt_info, 0, vect_body);
720f5239 1212
73fbfcad 1213 if (dump_enabled_p ())
78c60e3d
SS
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: inside_cost = %d, "
e645e942 1216 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1217}
1218
1219
1220/* Calculate cost of DR's memory access. */
1221void
57c454d2 1222vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1223 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1224 unsigned int *prologue_cost,
1225 stmt_vector_for_cost *prologue_cost_vec,
1226 stmt_vector_for_cost *body_cost_vec,
1227 bool record_prologue_costs)
720f5239 1228{
89fa689a
RS
1229 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1230 int alignment_support_scheme
1231 = vect_supportable_dr_alignment (dr_info, false);
720f5239
IR
1232
1233 switch (alignment_support_scheme)
ebfd146a
IR
1234 {
1235 case dr_aligned:
1236 {
92345349
BS
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1238 stmt_info, 0, vect_body);
ebfd146a 1239
73fbfcad 1240 if (dump_enabled_p ())
78c60e3d 1241 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1242 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1243
1244 break;
1245 }
1246 case dr_unaligned_supported:
1247 {
720f5239 1248 /* Here, we assign an additional cost for the unaligned load. */
92345349 1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1250 unaligned_load, stmt_info,
89fa689a
RS
1251 DR_MISALIGNMENT (dr_info),
1252 vect_body);
c3e7ee41 1253
73fbfcad 1254 if (dump_enabled_p ())
78c60e3d
SS
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
e645e942 1257 "hardware.\n");
ebfd146a
IR
1258
1259 break;
1260 }
1261 case dr_explicit_realign:
1262 {
92345349
BS
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1267
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
92345349 1270 prologue costs. */
ebfd146a 1271 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
ebfd146a 1274
73fbfcad 1275 if (dump_enabled_p ())
e645e942
TJ
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
8bd37302 1278
ebfd146a
IR
1279 break;
1280 }
1281 case dr_explicit_realign_optimized:
1282 {
73fbfcad 1283 if (dump_enabled_p ())
e645e942 1284 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1285 "vect_model_load_cost: unaligned software "
e645e942 1286 "pipelined.\n");
ebfd146a
IR
1287
1288 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1289 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1290 if this is an access in a group of loads, which provide grouped
ebfd146a 1291 access, then the above cost should only be considered for one
ff802fa1 1292 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1293 and a realignment op. */
1294
92345349 1295 if (add_realign_cost && record_prologue_costs)
ebfd146a 1296 {
92345349
BS
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
ebfd146a 1300 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
ebfd146a
IR
1304 }
1305
92345349
BS
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
8bd37302 1310
73fbfcad 1311 if (dump_enabled_p ())
78c60e3d 1312 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
8bd37302 1315
ebfd146a
IR
1316 break;
1317 }
1318
38eec4c6
UW
1319 case dr_unaligned_unsupported:
1320 {
1321 *inside_cost = VECT_MAX_COST;
1322
73fbfcad 1323 if (dump_enabled_p ())
78c60e3d 1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1325 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1326 break;
1327 }
1328
ebfd146a
IR
1329 default:
1330 gcc_unreachable ();
1331 }
ebfd146a
IR
1332}
1333
418b7df3 1334/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
32e8e429 1335 the loop preheader for the vectorized stmt STMT_VINFO. */
ebfd146a 1336
418b7df3 1337static void
32e8e429
RS
1338vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
ebfd146a 1340{
ebfd146a 1341 if (gsi)
a1824cfd 1342 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
ebfd146a
IR
1343 else
1344 {
1345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1346
a70d6342
IR
1347 if (loop_vinfo)
1348 {
1349 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1350 basic_block new_bb;
1351 edge pe;
a70d6342 1352
86a91c0a
RS
1353 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1354 loop = loop->inner;
b8698a0f 1355
a70d6342 1356 pe = loop_preheader_edge (loop);
418b7df3 1357 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1358 gcc_assert (!new_bb);
1359 }
1360 else
1361 {
1362 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1363 basic_block bb;
1364 gimple_stmt_iterator gsi_bb_start;
1365
1366 gcc_assert (bb_vinfo);
1367 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1368 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1369 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1370 }
ebfd146a
IR
1371 }
1372
73fbfcad 1373 if (dump_enabled_p ())
3c2a8ed0
DM
1374 dump_printf_loc (MSG_NOTE, vect_location,
1375 "created new init_stmt: %G", new_stmt);
418b7df3
RG
1376}
1377
1378/* Function vect_init_vector.
ebfd146a 1379
5467ee52
RG
1380 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1381 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1382 vector type a vector with all elements equal to VAL is created first.
1383 Place the initialization at BSI if it is not NULL. Otherwise, place the
1384 initialization at the loop preheader.
418b7df3 1385 Return the DEF of INIT_STMT.
32e8e429 1386 It will be used in the vectorization of STMT_INFO. */
418b7df3
RG
1387
1388tree
32e8e429
RS
1389vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1390 gimple_stmt_iterator *gsi)
418b7df3 1391{
355fe088 1392 gimple *init_stmt;
418b7df3
RG
1393 tree new_temp;
1394
e412ece4
RB
1395 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1396 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1397 {
e412ece4
RB
1398 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1399 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1400 {
5a308cf1
IE
1401 /* Scalar boolean value should be transformed into
1402 all zeros or all ones value before building a vector. */
1403 if (VECTOR_BOOLEAN_TYPE_P (type))
1404 {
b3d51f23
IE
1405 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1406 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1407
1408 if (CONSTANT_CLASS_P (val))
1409 val = integer_zerop (val) ? false_val : true_val;
1410 else
1411 {
1412 new_temp = make_ssa_name (TREE_TYPE (type));
1413 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1414 val, true_val, false_val);
a1824cfd 1415 vect_init_vector_1 (stmt_info, init_stmt, gsi);
5a308cf1
IE
1416 val = new_temp;
1417 }
1418 }
1419 else if (CONSTANT_CLASS_P (val))
42fd8198 1420 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1421 else
1422 {
b731b390 1423 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1424 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1425 init_stmt = gimple_build_assign (new_temp,
1426 fold_build1 (VIEW_CONVERT_EXPR,
1427 TREE_TYPE (type),
1428 val));
1429 else
1430 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
a1824cfd 1431 vect_init_vector_1 (stmt_info, init_stmt, gsi);
5467ee52 1432 val = new_temp;
418b7df3
RG
1433 }
1434 }
5467ee52 1435 val = build_vector_from_val (type, val);
418b7df3
RG
1436 }
1437
0e22bb5a
RB
1438 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1439 init_stmt = gimple_build_assign (new_temp, val);
a1824cfd 1440 vect_init_vector_1 (stmt_info, init_stmt, gsi);
0e22bb5a 1441 return new_temp;
ebfd146a
IR
1442}
1443
c83a894c 1444/* Function vect_get_vec_def_for_operand_1.
a70d6342 1445
32e8e429
RS
1446 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1447 with type DT that will be used in the vectorized stmt. */
ebfd146a
IR
1448
1449tree
32e8e429
RS
1450vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1451 enum vect_def_type dt)
ebfd146a
IR
1452{
1453 tree vec_oprnd;
1eede195 1454 stmt_vec_info vec_stmt_info;
ebfd146a
IR
1455
1456 switch (dt)
1457 {
81c40241 1458 /* operand is a constant or a loop invariant. */
ebfd146a 1459 case vect_constant_def:
81c40241 1460 case vect_external_def:
c83a894c
AH
1461 /* Code should use vect_get_vec_def_for_operand. */
1462 gcc_unreachable ();
ebfd146a 1463
81c40241 1464 /* operand is defined inside the loop. */
8644a673 1465 case vect_internal_def:
ebfd146a 1466 {
ebfd146a 1467 /* Get the def from the vectorized stmt. */
1eede195
RS
1468 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1469 /* Get vectorized pattern statement. */
1470 if (!vec_stmt_info
1471 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1472 && !STMT_VINFO_RELEVANT (def_stmt_info))
1473 vec_stmt_info = (STMT_VINFO_VEC_STMT
1474 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1475 gcc_assert (vec_stmt_info);
1476 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1477 vec_oprnd = PHI_RESULT (phi);
ebfd146a 1478 else
1eede195
RS
1479 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1480 return vec_oprnd;
ebfd146a
IR
1481 }
1482
c78e3652 1483 /* operand is defined by a loop header phi. */
ebfd146a 1484 case vect_reduction_def:
06066f92 1485 case vect_double_reduction_def:
7c5222ff 1486 case vect_nested_cycle:
ebfd146a
IR
1487 case vect_induction_def:
1488 {
32e8e429 1489 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI);
ebfd146a 1490
1eede195 1491 /* Get the def from the vectorized stmt. */
1eede195
RS
1492 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1493 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1494 vec_oprnd = PHI_RESULT (phi);
6dbbece6 1495 else
1eede195
RS
1496 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1497 return vec_oprnd;
ebfd146a
IR
1498 }
1499
1500 default:
1501 gcc_unreachable ();
1502 }
1503}
1504
1505
c83a894c
AH
1506/* Function vect_get_vec_def_for_operand.
1507
32e8e429
RS
1508 OP is an operand in STMT_VINFO. This function returns a (vector) def
1509 that will be used in the vectorized stmt for STMT_VINFO.
c83a894c
AH
1510
1511 In the case that OP is an SSA_NAME which is defined in the loop, then
1512 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1513
1514 In case OP is an invariant or constant, a new stmt that creates a vector def
1515 needs to be introduced. VECTYPE may be used to specify a required type for
1516 vector invariant. */
1517
1518tree
32e8e429 1519vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
c83a894c
AH
1520{
1521 gimple *def_stmt;
1522 enum vect_def_type dt;
1523 bool is_simple_use;
c83a894c
AH
1524 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1525
1526 if (dump_enabled_p ())
3c2a8ed0
DM
1527 dump_printf_loc (MSG_NOTE, vect_location,
1528 "vect_get_vec_def_for_operand: %T\n", op);
c83a894c 1529
fef96d8e
RS
1530 stmt_vec_info def_stmt_info;
1531 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1532 &def_stmt_info, &def_stmt);
c83a894c
AH
1533 gcc_assert (is_simple_use);
1534 if (def_stmt && dump_enabled_p ())
3c2a8ed0 1535 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
c83a894c
AH
1536
1537 if (dt == vect_constant_def || dt == vect_external_def)
1538 {
1539 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1540 tree vector_type;
1541
1542 if (vectype)
1543 vector_type = vectype;
2568d8a1 1544 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1545 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1546 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1547 else
1548 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1549
1550 gcc_assert (vector_type);
86a91c0a 1551 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
c83a894c
AH
1552 }
1553 else
fef96d8e 1554 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
c83a894c
AH
1555}
1556
1557
ebfd146a
IR
1558/* Function vect_get_vec_def_for_stmt_copy
1559
ff802fa1 1560 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1561 vectorized stmt to be created (by the caller to this function) is a "copy"
1562 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1563 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1564 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
e4057a39 1565 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
ebfd146a
IR
1566
1567 Context:
1568 In case the vectorization factor (VF) is bigger than the number
1569 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1570 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1571 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1572 smallest data-type determines the VF, and as a result, when vectorizing
1573 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1574 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1575 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1576 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1577 which VF=16 and nunits=4, so the number of copies required is 4):
1578
1579 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1580
ebfd146a
IR
1581 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1582 VS1.1: vx.1 = memref1 VS1.2
1583 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1584 VS1.3: vx.3 = memref3
ebfd146a
IR
1585
1586 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1587 VSnew.1: vz1 = vx.1 + ... VSnew.2
1588 VSnew.2: vz2 = vx.2 + ... VSnew.3
1589 VSnew.3: vz3 = vx.3 + ...
1590
1591 The vectorization of S1 is explained in vectorizable_load.
1592 The vectorization of S2:
b8698a0f
L
1593 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1594 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1595 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1596 returns the vector-def 'vx.0'.
1597
b8698a0f
L
1598 To create the remaining copies of the vector-stmt (VSnew.j), this
1599 function is called to get the relevant vector-def for each operand. It is
1600 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1601 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1602
b8698a0f
L
1603 For example, to obtain the vector-def 'vx.1' in order to create the
1604 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1605 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1606 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1607 and return its def ('vx.1').
1608 Overall, to create the above sequence this function will be called 3 times:
e4057a39
RS
1609 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1610 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1611 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
ebfd146a
IR
1612
1613tree
e4057a39 1614vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
ebfd146a 1615{
e4057a39
RS
1616 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1617 if (!def_stmt_info)
1618 /* Do nothing; can reuse same def. */
ebfd146a
IR
1619 return vec_oprnd;
1620
e4057a39 1621 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
ebfd146a 1622 gcc_assert (def_stmt_info);
e4057a39
RS
1623 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1624 vec_oprnd = PHI_RESULT (phi);
ebfd146a 1625 else
e4057a39 1626 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
ebfd146a
IR
1627 return vec_oprnd;
1628}
1629
1630
1631/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1632 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1633
c78e3652 1634void
e4057a39 1635vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
9771b263
DN
1636 vec<tree> *vec_oprnds0,
1637 vec<tree> *vec_oprnds1)
ebfd146a 1638{
9771b263 1639 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a 1640
e4057a39 1641 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
9771b263 1642 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1643
9771b263 1644 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1645 {
9771b263 1646 vec_oprnd = vec_oprnds1->pop ();
e4057a39 1647 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
9771b263 1648 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1649 }
1650}
1651
1652
c78e3652 1653/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1654
c78e3652 1655void
32e8e429 1656vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
9771b263
DN
1657 vec<tree> *vec_oprnds0,
1658 vec<tree> *vec_oprnds1,
306b0c92 1659 slp_tree slp_node)
ebfd146a
IR
1660{
1661 if (slp_node)
d092494c
IR
1662 {
1663 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1664 auto_vec<tree> ops (nops);
1665 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1666
9771b263 1667 ops.quick_push (op0);
d092494c 1668 if (op1)
9771b263 1669 ops.quick_push (op1);
d092494c 1670
306b0c92 1671 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1672
37b5ec8f 1673 *vec_oprnds0 = vec_defs[0];
d092494c 1674 if (op1)
37b5ec8f 1675 *vec_oprnds1 = vec_defs[1];
d092494c 1676 }
ebfd146a
IR
1677 else
1678 {
1679 tree vec_oprnd;
1680
9771b263 1681 vec_oprnds0->create (1);
a1824cfd 1682 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
9771b263 1683 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1684
1685 if (op1)
1686 {
9771b263 1687 vec_oprnds1->create (1);
a1824cfd 1688 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
9771b263 1689 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1690 }
1691 }
1692}
1693
bb6c2b68
RS
1694/* Helper function called by vect_finish_replace_stmt and
1695 vect_finish_stmt_generation. Set the location of the new
e1bd7296 1696 statement and create and return a stmt_vec_info for it. */
bb6c2b68 1697
e1bd7296 1698static stmt_vec_info
32e8e429 1699vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
bb6c2b68 1700{
bb6c2b68
RS
1701 vec_info *vinfo = stmt_info->vinfo;
1702
e1bd7296 1703 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
bb6c2b68
RS
1704
1705 if (dump_enabled_p ())
3c2a8ed0 1706 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
bb6c2b68 1707
86a91c0a 1708 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
bb6c2b68
RS
1709
1710 /* While EH edges will generally prevent vectorization, stmt might
1711 e.g. be in a must-not-throw region. Ensure newly created stmts
1712 that could throw are part of the same region. */
86a91c0a 1713 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
36bbc05d 1714 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
bb6c2b68 1715 add_stmt_to_eh_lp (vec_stmt, lp_nr);
e1bd7296
RS
1716
1717 return vec_stmt_info;
bb6c2b68
RS
1718}
1719
32e8e429
RS
1720/* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1721 which sets the same scalar result as STMT_INFO did. Create and return a
e1bd7296 1722 stmt_vec_info for VEC_STMT. */
bb6c2b68 1723
e1bd7296 1724stmt_vec_info
32e8e429 1725vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
bb6c2b68 1726{
a1824cfd 1727 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
bb6c2b68 1728
a1824cfd 1729 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
bb6c2b68
RS
1730 gsi_replace (&gsi, vec_stmt, false);
1731
a1824cfd 1732 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
bb6c2b68 1733}
ebfd146a 1734
32e8e429 1735/* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
e1bd7296 1736 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
ebfd146a 1737
e1bd7296 1738stmt_vec_info
32e8e429 1739vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
ebfd146a
IR
1740 gimple_stmt_iterator *gsi)
1741{
a1824cfd 1742 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
ebfd146a 1743
54e8e2c3
RG
1744 if (!gsi_end_p (*gsi)
1745 && gimple_has_mem_ops (vec_stmt))
1746 {
355fe088 1747 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1748 tree vuse = gimple_vuse (at_stmt);
1749 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1750 {
1751 tree vdef = gimple_vdef (at_stmt);
1752 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1753 /* If we have an SSA vuse and insert a store, update virtual
1754 SSA form to avoid triggering the renamer. Do so only
1755 if we can easily see all uses - which is what almost always
1756 happens with the way vectorized stmts are inserted. */
1757 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1758 && ((is_gimple_assign (vec_stmt)
1759 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1760 || (is_gimple_call (vec_stmt)
1761 && !(gimple_call_flags (vec_stmt)
1762 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1763 {
1764 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1765 gimple_set_vdef (vec_stmt, new_vdef);
1766 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1767 }
1768 }
1769 }
ebfd146a 1770 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
a1824cfd 1771 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
ebfd146a
IR
1772}
1773
70439f0d
RS
1774/* We want to vectorize a call to combined function CFN with function
1775 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1776 as the types of all inputs. Check whether this is possible using
1777 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1778
70439f0d
RS
1779static internal_fn
1780vectorizable_internal_function (combined_fn cfn, tree fndecl,
1781 tree vectype_out, tree vectype_in)
ebfd146a 1782{
70439f0d
RS
1783 internal_fn ifn;
1784 if (internal_fn_p (cfn))
1785 ifn = as_internal_fn (cfn);
1786 else
1787 ifn = associated_internal_fn (fndecl);
1788 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1789 {
1790 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1791 if (info.vectorizable)
1792 {
1793 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1794 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1795 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1796 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1797 return ifn;
1798 }
1799 }
1800 return IFN_LAST;
ebfd146a
IR
1801}
1802
5ce9450f 1803
82570274 1804static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
5ce9450f
JJ
1805 gimple_stmt_iterator *);
1806
7cfb4d93
RS
1807/* Check whether a load or store statement in the loop described by
1808 LOOP_VINFO is possible in a fully-masked loop. This is testing
1809 whether the vectorizer pass has the appropriate support, as well as
1810 whether the target does.
1811
1812 VLS_TYPE says whether the statement is a load or store and VECTYPE
1813 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1814 says how the load or store is going to be implemented and GROUP_SIZE
1815 is the number of load or store statements in the containing group.
bfaa08b7
RS
1816 If the access is a gather load or scatter store, GS_INFO describes
1817 its arguments.
7cfb4d93
RS
1818
1819 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1820 supported, otherwise record the required mask types. */
1821
1822static void
1823check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1824 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1825 vect_memory_access_type memory_access_type,
1826 gather_scatter_info *gs_info)
7cfb4d93
RS
1827{
1828 /* Invariant loads need no special support. */
1829 if (memory_access_type == VMAT_INVARIANT)
1830 return;
1831
1832 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1833 machine_mode vecmode = TYPE_MODE (vectype);
1834 bool is_load = (vls_type == VLS_LOAD);
1835 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1836 {
1837 if (is_load
1838 ? !vect_load_lanes_supported (vectype, group_size, true)
1839 : !vect_store_lanes_supported (vectype, group_size, true))
1840 {
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843 "can't use a fully-masked loop because the"
1844 " target doesn't have an appropriate masked"
1845 " load/store-lanes instruction.\n");
1846 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1847 return;
1848 }
1849 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1850 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1851 return;
1852 }
1853
bfaa08b7
RS
1854 if (memory_access_type == VMAT_GATHER_SCATTER)
1855 {
f307441a
RS
1856 internal_fn ifn = (is_load
1857 ? IFN_MASK_GATHER_LOAD
1858 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1859 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1860 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1861 gs_info->memory_type,
1862 TYPE_SIGN (offset_type),
1863 gs_info->scale))
1864 {
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1867 "can't use a fully-masked loop because the"
1868 " target doesn't have an appropriate masked"
f307441a 1869 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1870 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1871 return;
1872 }
1873 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1874 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1875 return;
1876 }
1877
7cfb4d93
RS
1878 if (memory_access_type != VMAT_CONTIGUOUS
1879 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1880 {
1881 /* Element X of the data must come from iteration i * VF + X of the
1882 scalar loop. We need more work to support other mappings. */
1883 if (dump_enabled_p ())
1884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1885 "can't use a fully-masked loop because an access"
1886 " isn't contiguous.\n");
1887 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1888 return;
1889 }
1890
1891 machine_mode mask_mode;
1892 if (!(targetm.vectorize.get_mask_mode
1893 (GET_MODE_NUNITS (vecmode),
1894 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1895 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1896 {
1897 if (dump_enabled_p ())
1898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1899 "can't use a fully-masked loop because the target"
1900 " doesn't have the appropriate masked load or"
1901 " store.\n");
1902 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1903 return;
1904 }
1905 /* We might load more scalars than we need for permuting SLP loads.
1906 We checked in get_group_load_store_type that the extra elements
1907 don't leak into a new vector. */
1908 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1909 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1910 unsigned int nvectors;
1911 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1912 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1913 else
1914 gcc_unreachable ();
1915}
1916
1917/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1918 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1919 that needs to be applied to all loads and stores in a vectorized loop.
1920 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1921
1922 MASK_TYPE is the type of both masks. If new statements are needed,
1923 insert them before GSI. */
1924
1925static tree
1926prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1927 gimple_stmt_iterator *gsi)
1928{
1929 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1930 if (!loop_mask)
1931 return vec_mask;
1932
1933 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1934 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1935 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1936 vec_mask, loop_mask);
1937 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1938 return and_res;
1939}
1940
429ef523 1941/* Determine whether we can use a gather load or scatter store to vectorize
32e8e429
RS
1942 strided load or store STMT_INFO by truncating the current offset to a
1943 smaller width. We need to be able to construct an offset vector:
429ef523
RS
1944
1945 { 0, X, X*2, X*3, ... }
1946
32e8e429 1947 without loss of precision, where X is STMT_INFO's DR_STEP.
429ef523
RS
1948
1949 Return true if this is possible, describing the gather load or scatter
1950 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1951
1952static bool
32e8e429
RS
1953vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1954 loop_vec_info loop_vinfo, bool masked_p,
429ef523
RS
1955 gather_scatter_info *gs_info)
1956{
89fa689a
RS
1957 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1958 data_reference *dr = dr_info->dr;
429ef523
RS
1959 tree step = DR_STEP (dr);
1960 if (TREE_CODE (step) != INTEGER_CST)
1961 {
1962 /* ??? Perhaps we could use range information here? */
1963 if (dump_enabled_p ())
1964 dump_printf_loc (MSG_NOTE, vect_location,
1965 "cannot truncate variable step.\n");
1966 return false;
1967 }
1968
1969 /* Get the number of bits in an element. */
1970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1971 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1972 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1973
1974 /* Set COUNT to the upper limit on the number of elements - 1.
1975 Start with the maximum vectorization factor. */
1976 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1977
1978 /* Try lowering COUNT to the number of scalar latch iterations. */
1979 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1980 widest_int max_iters;
1981 if (max_loop_iterations (loop, &max_iters)
1982 && max_iters < count)
1983 count = max_iters.to_shwi ();
1984
1985 /* Try scales of 1 and the element size. */
89fa689a 1986 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
4a669ac3 1987 wi::overflow_type overflow = wi::OVF_NONE;
429ef523
RS
1988 for (int i = 0; i < 2; ++i)
1989 {
1990 int scale = scales[i];
1991 widest_int factor;
1992 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1993 continue;
1994
1995 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1996 in OFFSET_BITS bits. */
4a669ac3
AH
1997 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1998 if (overflow)
429ef523
RS
1999 continue;
2000 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2001 if (wi::min_precision (range, sign) > element_bits)
2002 {
4a669ac3 2003 overflow = wi::OVF_UNKNOWN;
429ef523
RS
2004 continue;
2005 }
2006
2007 /* See whether the target supports the operation. */
2008 tree memory_type = TREE_TYPE (DR_REF (dr));
2009 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2010 memory_type, element_bits, sign, scale,
2011 &gs_info->ifn, &gs_info->element_type))
2012 continue;
2013
2014 tree offset_type = build_nonstandard_integer_type (element_bits,
2015 sign == UNSIGNED);
2016
2017 gs_info->decl = NULL_TREE;
2018 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2019 but we don't need to store that here. */
2020 gs_info->base = NULL_TREE;
2021 gs_info->offset = fold_convert (offset_type, step);
929b4411 2022 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2023 gs_info->offset_vectype = NULL_TREE;
2024 gs_info->scale = scale;
2025 gs_info->memory_type = memory_type;
2026 return true;
2027 }
2028
4a669ac3 2029 if (overflow && dump_enabled_p ())
429ef523
RS
2030 dump_printf_loc (MSG_NOTE, vect_location,
2031 "truncating gather/scatter offset to %d bits"
2032 " might change its value.\n", element_bits);
2033
2034 return false;
2035}
2036
ab2fc782 2037/* Return true if we can use gather/scatter internal functions to
82570274 2038 vectorize STMT_INFO, which is a grouped or strided load or store.
429ef523
RS
2039 MASKED_P is true if load or store is conditional. When returning
2040 true, fill in GS_INFO with the information required to perform the
2041 operation. */
ab2fc782
RS
2042
2043static bool
82570274
RS
2044vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2045 loop_vec_info loop_vinfo, bool masked_p,
ab2fc782
RS
2046 gather_scatter_info *gs_info)
2047{
82570274 2048 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
ab2fc782 2049 || gs_info->decl)
82570274 2050 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
429ef523 2051 masked_p, gs_info);
ab2fc782
RS
2052
2053 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2054 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2055 tree offset_type = TREE_TYPE (gs_info->offset);
2056 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2057
2058 /* Enforced by vect_check_gather_scatter. */
2059 gcc_assert (element_bits >= offset_bits);
2060
2061 /* If the elements are wider than the offset, convert the offset to the
2062 same width, without changing its sign. */
2063 if (element_bits > offset_bits)
2064 {
2065 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2066 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2067 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2068 }
2069
2070 if (dump_enabled_p ())
2071 dump_printf_loc (MSG_NOTE, vect_location,
2072 "using gather/scatter for strided/grouped access,"
2073 " scale = %d\n", gs_info->scale);
2074
2075 return true;
2076}
2077
32e8e429 2078/* STMT_INFO is a non-strided load or store, meaning that it accesses
62da9e14
RS
2079 elements with a known constant step. Return -1 if that step
2080 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2081
2082static int
32e8e429 2083compare_step_with_zero (stmt_vec_info stmt_info)
62da9e14 2084{
89fa689a
RS
2085 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2086 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
3f5e8a76 2087 size_zero_node);
62da9e14
RS
2088}
2089
2090/* If the target supports a permute mask that reverses the elements in
2091 a vector of type VECTYPE, return that mask, otherwise return null. */
2092
2093static tree
2094perm_mask_for_reverse (tree vectype)
2095{
928686b1 2096 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2097
d980067b
RS
2098 /* The encoding has a single stepped pattern. */
2099 vec_perm_builder sel (nunits, 1, 3);
928686b1 2100 for (int i = 0; i < 3; ++i)
908a1a16 2101 sel.quick_push (nunits - 1 - i);
62da9e14 2102
e3342de4
RS
2103 vec_perm_indices indices (sel, 1, nunits);
2104 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2105 return NULL_TREE;
e3342de4 2106 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2107}
5ce9450f 2108
32e8e429 2109/* STMT_INFO is either a masked or unconditional store. Return the value
c3a8f964
RS
2110 being stored. */
2111
f307441a 2112tree
32e8e429 2113vect_get_store_rhs (stmt_vec_info stmt_info)
c3a8f964 2114{
32e8e429 2115 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
c3a8f964
RS
2116 {
2117 gcc_assert (gimple_assign_single_p (assign));
2118 return gimple_assign_rhs1 (assign);
2119 }
32e8e429 2120 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
c3a8f964
RS
2121 {
2122 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2123 int index = internal_fn_stored_value_index (ifn);
2124 gcc_assert (index >= 0);
32e8e429 2125 return gimple_call_arg (call, index);
c3a8f964
RS
2126 }
2127 gcc_unreachable ();
2128}
2129
2de001ee 2130/* A subroutine of get_load_store_type, with a subset of the same
32e8e429 2131 arguments. Handle the case where STMT_INFO is part of a grouped load
2de001ee
RS
2132 or store.
2133
2134 For stores, the statements in the group are all consecutive
2135 and there is no gap at the end. For loads, the statements in the
2136 group might not be consecutive; there can be gaps between statements
2137 as well as at the end. */
2138
2139static bool
32e8e429 2140get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
7e11fc7f 2141 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2142 vect_memory_access_type *memory_access_type,
2143 gather_scatter_info *gs_info)
2de001ee 2144{
2de001ee
RS
2145 vec_info *vinfo = stmt_info->vinfo;
2146 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2147 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
bffb8014 2148 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
89fa689a 2149 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
bffb8014
RS
2150 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2151 bool single_element_p = (stmt_info == first_stmt_info
2c53b149 2152 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
bffb8014 2153 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
928686b1 2154 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2155
2156 /* True if the vectorized statements would access beyond the last
2157 statement in the group. */
2158 bool overrun_p = false;
2159
2160 /* True if we can cope with such overrun by peeling for gaps, so that
2161 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2162 bool can_overrun_p = (!masked_p
2163 && vls_type == VLS_LOAD
2164 && loop_vinfo
2165 && !loop->inner);
2de001ee
RS
2166
2167 /* There can only be a gap at the end of the group if the stride is
2168 known at compile time. */
3ad3b3ac 2169 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2de001ee
RS
2170
2171 /* Stores can't yet have gaps. */
2172 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2173
2174 if (slp)
2175 {
3ad3b3ac 2176 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2de001ee 2177 {
2c53b149 2178 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2de001ee
RS
2179 separated by the stride, until we have a complete vector.
2180 Fall back to scalar accesses if that isn't possible. */
928686b1 2181 if (multiple_p (nunits, group_size))
2de001ee
RS
2182 *memory_access_type = VMAT_STRIDED_SLP;
2183 else
2184 *memory_access_type = VMAT_ELEMENTWISE;
2185 }
2186 else
2187 {
2188 overrun_p = loop_vinfo && gap != 0;
2189 if (overrun_p && vls_type != VLS_LOAD)
2190 {
2191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2192 "Grouped store with gaps requires"
2193 " non-consecutive accesses\n");
2194 return false;
2195 }
f702e7d4
RS
2196 /* An overrun is fine if the trailing elements are smaller
2197 than the alignment boundary B. Every vector access will
2198 be a multiple of B and so we are guaranteed to access a
2199 non-gap element in the same B-sized block. */
f9ef2c76 2200 if (overrun_p
89fa689a
RS
2201 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2202 / vect_get_scalar_dr_size (first_dr_info)))
f9ef2c76 2203 overrun_p = false;
2de001ee
RS
2204 if (overrun_p && !can_overrun_p)
2205 {
2206 if (dump_enabled_p ())
2207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2208 "Peeling for outer loop is not supported\n");
2209 return false;
2210 }
2211 *memory_access_type = VMAT_CONTIGUOUS;
2212 }
2213 }
2214 else
2215 {
2216 /* We can always handle this case using elementwise accesses,
2217 but see if something more efficient is available. */
2218 *memory_access_type = VMAT_ELEMENTWISE;
2219
2220 /* If there is a gap at the end of the group then these optimizations
2221 would access excess elements in the last iteration. */
2222 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2223 /* An overrun is fine if the trailing elements are smaller than the
2224 alignment boundary B. Every vector access will be a multiple of B
2225 and so we are guaranteed to access a non-gap element in the
2226 same B-sized block. */
f9ef2c76 2227 if (would_overrun_p
7e11fc7f 2228 && !masked_p
89fa689a
RS
2229 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2230 / vect_get_scalar_dr_size (first_dr_info)))
f9ef2c76 2231 would_overrun_p = false;
f702e7d4 2232
3ad3b3ac 2233 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
62da9e14 2234 && (can_overrun_p || !would_overrun_p)
86a91c0a 2235 && compare_step_with_zero (stmt_info) > 0)
2de001ee 2236 {
6737facb
RS
2237 /* First cope with the degenerate case of a single-element
2238 vector. */
2239 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2240 *memory_access_type = VMAT_CONTIGUOUS;
2241
2242 /* Otherwise try using LOAD/STORE_LANES. */
2243 if (*memory_access_type == VMAT_ELEMENTWISE
2244 && (vls_type == VLS_LOAD
7e11fc7f
RS
2245 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2246 : vect_store_lanes_supported (vectype, group_size,
2247 masked_p)))
2de001ee
RS
2248 {
2249 *memory_access_type = VMAT_LOAD_STORE_LANES;
2250 overrun_p = would_overrun_p;
2251 }
2252
2253 /* If that fails, try using permuting loads. */
2254 if (*memory_access_type == VMAT_ELEMENTWISE
2255 && (vls_type == VLS_LOAD
2256 ? vect_grouped_load_supported (vectype, single_element_p,
2257 group_size)
2258 : vect_grouped_store_supported (vectype, group_size)))
2259 {
2260 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2261 overrun_p = would_overrun_p;
2262 }
2263 }
429ef523
RS
2264
2265 /* As a last resort, trying using a gather load or scatter store.
2266
2267 ??? Although the code can handle all group sizes correctly,
2268 it probably isn't a win to use separate strided accesses based
2269 on nearby locations. Or, even if it's a win over scalar code,
2270 it might not be a win over vectorizing at a lower VF, if that
2271 allows us to use contiguous accesses. */
2272 if (*memory_access_type == VMAT_ELEMENTWISE
2273 && single_element_p
2274 && loop_vinfo
86a91c0a 2275 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
429ef523
RS
2276 masked_p, gs_info))
2277 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2278 }
2279
bffb8014 2280 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2de001ee
RS
2281 {
2282 /* STMT is the leader of the group. Check the operands of all the
2283 stmts of the group. */
bffb8014
RS
2284 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2285 while (next_stmt_info)
2de001ee 2286 {
bffb8014 2287 tree op = vect_get_store_rhs (next_stmt_info);
2de001ee 2288 enum vect_def_type dt;
894dd753 2289 if (!vect_is_simple_use (op, vinfo, &dt))
2de001ee
RS
2290 {
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2293 "use not simple.\n");
2294 return false;
2295 }
bffb8014 2296 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2de001ee
RS
2297 }
2298 }
2299
2300 if (overrun_p)
2301 {
2302 gcc_assert (can_overrun_p);
2303 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2305 "Data access with gaps requires scalar "
2306 "epilogue loop\n");
2307 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2308 }
2309
2310 return true;
2311}
2312
62da9e14 2313/* A subroutine of get_load_store_type, with a subset of the same
32e8e429 2314 arguments. Handle the case where STMT_INFO is a load or store that
62da9e14
RS
2315 accesses consecutive elements with a negative step. */
2316
2317static vect_memory_access_type
32e8e429 2318get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
62da9e14
RS
2319 vec_load_store_type vls_type,
2320 unsigned int ncopies)
2321{
89fa689a 2322 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
62da9e14
RS
2323 dr_alignment_support alignment_support_scheme;
2324
2325 if (ncopies > 1)
2326 {
2327 if (dump_enabled_p ())
2328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2329 "multiple types with negative step.\n");
2330 return VMAT_ELEMENTWISE;
2331 }
2332
89fa689a 2333 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
62da9e14
RS
2334 if (alignment_support_scheme != dr_aligned
2335 && alignment_support_scheme != dr_unaligned_supported)
2336 {
2337 if (dump_enabled_p ())
2338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2339 "negative step but alignment required.\n");
2340 return VMAT_ELEMENTWISE;
2341 }
2342
2343 if (vls_type == VLS_STORE_INVARIANT)
2344 {
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_NOTE, vect_location,
2347 "negative step with invariant source;"
2348 " no permute needed.\n");
2349 return VMAT_CONTIGUOUS_DOWN;
2350 }
2351
2352 if (!perm_mask_for_reverse (vectype))
2353 {
2354 if (dump_enabled_p ())
2355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2356 "negative step and reversing not supported.\n");
2357 return VMAT_ELEMENTWISE;
2358 }
2359
2360 return VMAT_CONTIGUOUS_REVERSE;
2361}
2362
32e8e429 2363/* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2de001ee
RS
2364 if there is a memory access type that the vectorized form can use,
2365 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2366 or scatters, fill in GS_INFO accordingly.
2367
2368 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2369 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2370 VECTYPE is the vector type that the vectorized statements will use.
2371 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2372
2373static bool
32e8e429
RS
2374get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2375 bool masked_p, vec_load_store_type vls_type,
2376 unsigned int ncopies,
2de001ee
RS
2377 vect_memory_access_type *memory_access_type,
2378 gather_scatter_info *gs_info)
2379{
2de001ee
RS
2380 vec_info *vinfo = stmt_info->vinfo;
2381 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2382 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2383 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2384 {
2385 *memory_access_type = VMAT_GATHER_SCATTER;
86a91c0a 2386 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2de001ee 2387 gcc_unreachable ();
894dd753 2388 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2de001ee
RS
2389 &gs_info->offset_dt,
2390 &gs_info->offset_vectype))
2391 {
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394 "%s index use not simple.\n",
2395 vls_type == VLS_LOAD ? "gather" : "scatter");
2396 return false;
2397 }
2398 }
2399 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2400 {
86a91c0a
RS
2401 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2402 vls_type, memory_access_type, gs_info))
2de001ee
RS
2403 return false;
2404 }
2405 else if (STMT_VINFO_STRIDED_P (stmt_info))
2406 {
2407 gcc_assert (!slp);
ab2fc782 2408 if (loop_vinfo
86a91c0a 2409 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
429ef523 2410 masked_p, gs_info))
ab2fc782
RS
2411 *memory_access_type = VMAT_GATHER_SCATTER;
2412 else
2413 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2414 }
2415 else
62da9e14 2416 {
86a91c0a 2417 int cmp = compare_step_with_zero (stmt_info);
62da9e14
RS
2418 if (cmp < 0)
2419 *memory_access_type = get_negative_load_store_type
86a91c0a 2420 (stmt_info, vectype, vls_type, ncopies);
62da9e14
RS
2421 else if (cmp == 0)
2422 {
2423 gcc_assert (vls_type == VLS_LOAD);
2424 *memory_access_type = VMAT_INVARIANT;
2425 }
2426 else
2427 *memory_access_type = VMAT_CONTIGUOUS;
2428 }
2de001ee 2429
4d694b27
RS
2430 if ((*memory_access_type == VMAT_ELEMENTWISE
2431 || *memory_access_type == VMAT_STRIDED_SLP)
2432 && !nunits.is_constant ())
2433 {
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2436 "Not using elementwise accesses due to variable "
2437 "vectorization factor.\n");
2438 return false;
2439 }
2440
2de001ee
RS
2441 /* FIXME: At the moment the cost model seems to underestimate the
2442 cost of using elementwise accesses. This check preserves the
2443 traditional behavior until that can be fixed. */
3ad3b3ac
RS
2444 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2445 if (!first_stmt_info)
2446 first_stmt_info = stmt_info;
2de001ee 2447 if (*memory_access_type == VMAT_ELEMENTWISE
3ad3b3ac 2448 && !STMT_VINFO_STRIDED_P (first_stmt_info)
bffb8014 2449 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2c53b149
RB
2450 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2451 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2de001ee
RS
2452 {
2453 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2455 "not falling back to elementwise accesses\n");
2456 return false;
2457 }
2458 return true;
2459}
2460
aaeefd88 2461/* Return true if boolean argument MASK is suitable for vectorizing
32e8e429 2462 conditional load or store STMT_INFO. When returning true, store the type
929b4411
RS
2463 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2464 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2465
2466static bool
32e8e429 2467vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
929b4411
RS
2468 vect_def_type *mask_dt_out,
2469 tree *mask_vectype_out)
aaeefd88
RS
2470{
2471 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2472 {
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "mask argument is not a boolean.\n");
2476 return false;
2477 }
2478
2479 if (TREE_CODE (mask) != SSA_NAME)
2480 {
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 "mask argument is not an SSA name.\n");
2484 return false;
2485 }
2486
929b4411 2487 enum vect_def_type mask_dt;
aaeefd88 2488 tree mask_vectype;
894dd753 2489 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
aaeefd88
RS
2490 {
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "mask use not simple.\n");
2494 return false;
2495 }
2496
2497 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2498 if (!mask_vectype)
2499 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2500
2501 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2502 {
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "could not find an appropriate vector mask type.\n");
2506 return false;
2507 }
2508
2509 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2510 TYPE_VECTOR_SUBPARTS (vectype)))
2511 {
2512 if (dump_enabled_p ())
3c2a8ed0
DM
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2514 "vector mask type %T",
2515 " does not match vector data type %T.\n",
2516 mask_vectype, vectype);
2517
aaeefd88
RS
2518 return false;
2519 }
2520
929b4411 2521 *mask_dt_out = mask_dt;
aaeefd88
RS
2522 *mask_vectype_out = mask_vectype;
2523 return true;
2524}
2525
3133c3b6 2526/* Return true if stored value RHS is suitable for vectorizing store
32e8e429 2527 statement STMT_INFO. When returning true, store the type of the
929b4411
RS
2528 definition in *RHS_DT_OUT, the type of the vectorized store value in
2529 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2530
2531static bool
32e8e429
RS
2532vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2533 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2534 vec_load_store_type *vls_type_out)
3133c3b6
RS
2535{
2536 /* In the case this is a store from a constant make sure
2537 native_encode_expr can handle it. */
2538 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2539 {
2540 if (dump_enabled_p ())
2541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2542 "cannot encode constant as a byte sequence.\n");
2543 return false;
2544 }
2545
929b4411 2546 enum vect_def_type rhs_dt;
3133c3b6 2547 tree rhs_vectype;
894dd753 2548 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
3133c3b6
RS
2549 {
2550 if (dump_enabled_p ())
2551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2552 "use not simple.\n");
2553 return false;
2554 }
2555
2556 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2557 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2558 {
2559 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 "incompatible vector types.\n");
2562 return false;
2563 }
2564
929b4411 2565 *rhs_dt_out = rhs_dt;
3133c3b6 2566 *rhs_vectype_out = rhs_vectype;
929b4411 2567 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2568 *vls_type_out = VLS_STORE_INVARIANT;
2569 else
2570 *vls_type_out = VLS_STORE;
2571 return true;
2572}
2573
82570274 2574/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
bc9587eb
RS
2575 Note that we support masks with floating-point type, in which case the
2576 floats are interpreted as a bitmask. */
2577
2578static tree
82570274 2579vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
bc9587eb
RS
2580{
2581 if (TREE_CODE (masktype) == INTEGER_TYPE)
2582 return build_int_cst (masktype, -1);
2583 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2584 {
2585 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2586 mask = build_vector_from_val (masktype, mask);
82570274 2587 return vect_init_vector (stmt_info, mask, masktype, NULL);
bc9587eb
RS
2588 }
2589 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2590 {
2591 REAL_VALUE_TYPE r;
2592 long tmp[6];
2593 for (int j = 0; j < 6; ++j)
2594 tmp[j] = -1;
2595 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2596 tree mask = build_real (TREE_TYPE (masktype), r);
2597 mask = build_vector_from_val (masktype, mask);
82570274 2598 return vect_init_vector (stmt_info, mask, masktype, NULL);
bc9587eb
RS
2599 }
2600 gcc_unreachable ();
2601}
2602
2603/* Build an all-zero merge value of type VECTYPE while vectorizing
82570274 2604 STMT_INFO as a gather load. */
bc9587eb
RS
2605
2606static tree
82570274 2607vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
bc9587eb
RS
2608{
2609 tree merge;
2610 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2611 merge = build_int_cst (TREE_TYPE (vectype), 0);
2612 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2613 {
2614 REAL_VALUE_TYPE r;
2615 long tmp[6];
2616 for (int j = 0; j < 6; ++j)
2617 tmp[j] = 0;
2618 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2619 merge = build_real (TREE_TYPE (vectype), r);
2620 }
2621 else
2622 gcc_unreachable ();
2623 merge = build_vector_from_val (vectype, merge);
82570274 2624 return vect_init_vector (stmt_info, merge, vectype, NULL);
bc9587eb
RS
2625}
2626
32e8e429
RS
2627/* Build a gather load call while vectorizing STMT_INFO. Insert new
2628 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2629 the gather load operation. If the load is conditional, MASK is the
2630 unvectorized condition and MASK_DT is its definition type, otherwise
2631 MASK is null. */
c48d2d35
RS
2632
2633static void
32e8e429
RS
2634vect_build_gather_load_calls (stmt_vec_info stmt_info,
2635 gimple_stmt_iterator *gsi,
1eede195 2636 stmt_vec_info *vec_stmt,
32e8e429 2637 gather_scatter_info *gs_info,
e4057a39 2638 tree mask)
c48d2d35 2639{
c48d2d35
RS
2640 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2641 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2642 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2643 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2644 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2645 edge pe = loop_preheader_edge (loop);
2646 enum { NARROW, NONE, WIDEN } modifier;
2647 poly_uint64 gather_off_nunits
2648 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2649
2650 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2651 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2652 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2653 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2654 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2655 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2656 tree scaletype = TREE_VALUE (arglist);
2657 gcc_checking_assert (types_compatible_p (srctype, rettype)
2658 && (!mask || types_compatible_p (srctype, masktype)));
2659
2660 tree perm_mask = NULL_TREE;
2661 tree mask_perm_mask = NULL_TREE;
2662 if (known_eq (nunits, gather_off_nunits))
2663 modifier = NONE;
2664 else if (known_eq (nunits * 2, gather_off_nunits))
2665 {
2666 modifier = WIDEN;
2667
2668 /* Currently widening gathers and scatters are only supported for
2669 fixed-length vectors. */
2670 int count = gather_off_nunits.to_constant ();
2671 vec_perm_builder sel (count, count, 1);
2672 for (int i = 0; i < count; ++i)
2673 sel.quick_push (i | (count / 2));
2674
2675 vec_perm_indices indices (sel, 1, count);
2676 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2677 indices);
2678 }
2679 else if (known_eq (nunits, gather_off_nunits * 2))
2680 {
2681 modifier = NARROW;
2682
2683 /* Currently narrowing gathers and scatters are only supported for
2684 fixed-length vectors. */
2685 int count = nunits.to_constant ();
2686 vec_perm_builder sel (count, count, 1);
2687 sel.quick_grow (count);
2688 for (int i = 0; i < count; ++i)
2689 sel[i] = i < count / 2 ? i : i + count / 2;
2690 vec_perm_indices indices (sel, 2, count);
2691 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2692
2693 ncopies *= 2;
2694
2695 if (mask)
2696 {
2697 for (int i = 0; i < count; ++i)
2698 sel[i] = i | (count / 2);
2699 indices.new_vector (sel, 2, count);
2700 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2701 }
2702 }
2703 else
2704 gcc_unreachable ();
2705
86a91c0a
RS
2706 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2707 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
c48d2d35
RS
2708
2709 tree ptr = fold_convert (ptrtype, gs_info->base);
2710 if (!is_gimple_min_invariant (ptr))
2711 {
2712 gimple_seq seq;
2713 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2714 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2715 gcc_assert (!new_bb);
2716 }
2717
2718 tree scale = build_int_cst (scaletype, gs_info->scale);
2719
2720 tree vec_oprnd0 = NULL_TREE;
2721 tree vec_mask = NULL_TREE;
2722 tree src_op = NULL_TREE;
2723 tree mask_op = NULL_TREE;
2724 tree prev_res = NULL_TREE;
2725 stmt_vec_info prev_stmt_info = NULL;
2726
2727 if (!mask)
2728 {
86a91c0a
RS
2729 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2730 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
c48d2d35
RS
2731 }
2732
2733 for (int j = 0; j < ncopies; ++j)
2734 {
2735 tree op, var;
c48d2d35
RS
2736 if (modifier == WIDEN && (j & 1))
2737 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
86a91c0a 2738 perm_mask, stmt_info, gsi);
c48d2d35
RS
2739 else if (j == 0)
2740 op = vec_oprnd0
86a91c0a 2741 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
c48d2d35 2742 else
e4057a39
RS
2743 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2744 vec_oprnd0);
c48d2d35
RS
2745
2746 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2747 {
2748 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2749 TYPE_VECTOR_SUBPARTS (idxtype)));
2750 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2751 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
e1bd7296 2752 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
86a91c0a 2753 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
c48d2d35
RS
2754 op = var;
2755 }
2756
2757 if (mask)
2758 {
2759 if (mask_perm_mask && (j & 1))
2760 mask_op = permute_vec_elements (mask_op, mask_op,
86a91c0a 2761 mask_perm_mask, stmt_info, gsi);
c48d2d35
RS
2762 else
2763 {
2764 if (j == 0)
86a91c0a 2765 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
c48d2d35 2766 else
e4057a39
RS
2767 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2768 vec_mask);
c48d2d35
RS
2769
2770 mask_op = vec_mask;
2771 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2772 {
2773 gcc_assert
2774 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2775 TYPE_VECTOR_SUBPARTS (masktype)));
2776 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2777 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
e1bd7296
RS
2778 gassign *new_stmt
2779 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
86a91c0a 2780 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
c48d2d35
RS
2781 mask_op = var;
2782 }
2783 }
2784 src_op = mask_op;
2785 }
2786
e1bd7296
RS
2787 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2788 mask_op, scale);
c48d2d35 2789
e1bd7296 2790 stmt_vec_info new_stmt_info;
c48d2d35
RS
2791 if (!useless_type_conversion_p (vectype, rettype))
2792 {
2793 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2794 TYPE_VECTOR_SUBPARTS (rettype)));
2795 op = vect_get_new_ssa_name (rettype, vect_simple_var);
e1bd7296 2796 gimple_call_set_lhs (new_call, op);
86a91c0a 2797 vect_finish_stmt_generation (stmt_info, new_call, gsi);
c48d2d35
RS
2798 var = make_ssa_name (vec_dest);
2799 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
e1bd7296 2800 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
86a91c0a
RS
2801 new_stmt_info
2802 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
c48d2d35
RS
2803 }
2804 else
2805 {
e1bd7296
RS
2806 var = make_ssa_name (vec_dest, new_call);
2807 gimple_call_set_lhs (new_call, var);
86a91c0a
RS
2808 new_stmt_info
2809 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
c48d2d35
RS
2810 }
2811
c48d2d35
RS
2812 if (modifier == NARROW)
2813 {
2814 if ((j & 1) == 0)
2815 {
2816 prev_res = var;
2817 continue;
2818 }
86a91c0a
RS
2819 var = permute_vec_elements (prev_res, var, perm_mask,
2820 stmt_info, gsi);
e1bd7296 2821 new_stmt_info = loop_vinfo->lookup_def (var);
c48d2d35
RS
2822 }
2823
ddf98a96 2824 if (prev_stmt_info == NULL)
e1bd7296 2825 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
c48d2d35 2826 else
e1bd7296
RS
2827 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2828 prev_stmt_info = new_stmt_info;
c48d2d35
RS
2829 }
2830}
2831
bfaa08b7
RS
2832/* Prepare the base and offset in GS_INFO for vectorization.
2833 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
82570274
RS
2834 to the vectorized offset argument for the first copy of STMT_INFO.
2835 STMT_INFO is the statement described by GS_INFO and LOOP is the
2836 containing loop. */
bfaa08b7
RS
2837
2838static void
82570274 2839vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
bfaa08b7
RS
2840 gather_scatter_info *gs_info,
2841 tree *dataref_ptr, tree *vec_offset)
2842{
2843 gimple_seq stmts = NULL;
2844 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2845 if (stmts != NULL)
2846 {
2847 basic_block new_bb;
2848 edge pe = loop_preheader_edge (loop);
2849 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2850 gcc_assert (!new_bb);
2851 }
2852 tree offset_type = TREE_TYPE (gs_info->offset);
2853 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
82570274 2854 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
bfaa08b7
RS
2855 offset_vectype);
2856}
2857
ab2fc782
RS
2858/* Prepare to implement a grouped or strided load or store using
2859 the gather load or scatter store operation described by GS_INFO.
32e8e429 2860 STMT_INFO is the load or store statement.
ab2fc782
RS
2861
2862 Set *DATAREF_BUMP to the amount that should be added to the base
2863 address after each copy of the vectorized statement. Set *VEC_OFFSET
2864 to an invariant offset vector in which element I has the value
2865 I * DR_STEP / SCALE. */
2866
2867static void
32e8e429
RS
2868vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2869 loop_vec_info loop_vinfo,
ab2fc782
RS
2870 gather_scatter_info *gs_info,
2871 tree *dataref_bump, tree *vec_offset)
2872{
ab2fc782
RS
2873 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2874 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2875 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2876 gimple_seq stmts;
2877
2878 tree bump = size_binop (MULT_EXPR,
2879 fold_convert (sizetype, DR_STEP (dr)),
2880 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2881 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2882 if (stmts)
2883 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2884
2885 /* The offset given in GS_INFO can have pointer type, so use the element
2886 type of the vector instead. */
2887 tree offset_type = TREE_TYPE (gs_info->offset);
2888 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2889 offset_type = TREE_TYPE (offset_vectype);
2890
2891 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2892 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2893 ssize_int (gs_info->scale));
2894 step = fold_convert (offset_type, step);
2895 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2896
2897 /* Create {0, X, X*2, X*3, ...}. */
2898 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2899 build_zero_cst (offset_type), step);
2900 if (stmts)
2901 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2902}
2903
2904/* Return the amount that should be added to a vector pointer to move
89fa689a 2905 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
ab2fc782
RS
2906 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2907 vectorization. */
2908
2909static tree
89fa689a 2910vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
ab2fc782
RS
2911 vect_memory_access_type memory_access_type)
2912{
2913 if (memory_access_type == VMAT_INVARIANT)
2914 return size_zero_node;
2915
2916 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
89fa689a 2917 tree step = vect_dr_behavior (dr_info)->step;
ab2fc782
RS
2918 if (tree_int_cst_sgn (step) == -1)
2919 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2920 return iv_step;
2921}
2922
37b14185
RB
2923/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2924
2925static bool
32e8e429 2926vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195 2927 stmt_vec_info *vec_stmt, slp_tree slp_node,
e4057a39 2928 tree vectype_in, stmt_vector_for_cost *cost_vec)
37b14185
RB
2929{
2930 tree op, vectype;
32e8e429 2931 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
e4057a39 2932 vec_info *vinfo = stmt_info->vinfo;
37b14185 2933 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1 2934 unsigned ncopies;
37b14185
RB
2935
2936 op = gimple_call_arg (stmt, 0);
2937 vectype = STMT_VINFO_VECTYPE (stmt_info);
8c2f568c 2938 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
37b14185
RB
2939
2940 /* Multiple types in SLP are handled by creating the appropriate number of
2941 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2942 case of SLP. */
2943 if (slp_node)
2944 ncopies = 1;
2945 else
e8f142e2 2946 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2947
2948 gcc_assert (ncopies >= 1);
2949
2950 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2951 if (! char_vectype)
2952 return false;
2953
8c2f568c
RS
2954 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2955 unsigned word_bytes;
2956 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
928686b1
RS
2957 return false;
2958
d980067b
RS
2959 /* The encoding uses one stepped pattern for each byte in the word. */
2960 vec_perm_builder elts (num_bytes, word_bytes, 3);
2961 for (unsigned i = 0; i < 3; ++i)
37b14185 2962 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2963 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2964
e3342de4
RS
2965 vec_perm_indices indices (elts, 1, num_bytes);
2966 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2967 return false;
2968
2969 if (! vec_stmt)
2970 {
2971 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 2972 DUMP_VECT_SCOPE ("vectorizable_bswap");
78604de0 2973 if (! slp_node)
37b14185 2974 {
68435eb2
RB
2975 record_stmt_cost (cost_vec,
2976 1, vector_stmt, stmt_info, 0, vect_prologue);
2977 record_stmt_cost (cost_vec,
2978 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
2979 }
2980 return true;
2981 }
2982
736d0f28 2983 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
2984
2985 /* Transform. */
2986 vec<tree> vec_oprnds = vNULL;
e1bd7296 2987 stmt_vec_info new_stmt_info = NULL;
37b14185
RB
2988 stmt_vec_info prev_stmt_info = NULL;
2989 for (unsigned j = 0; j < ncopies; j++)
2990 {
2991 /* Handle uses. */
2992 if (j == 0)
86a91c0a 2993 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
37b14185 2994 else
e4057a39 2995 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
37b14185
RB
2996
2997 /* Arguments are ready. create the new vector stmt. */
2998 unsigned i;
2999 tree vop;
3000 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3001 {
e1bd7296 3002 gimple *new_stmt;
37b14185
RB
3003 tree tem = make_ssa_name (char_vectype);
3004 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3005 char_vectype, vop));
86a91c0a 3006 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
37b14185
RB
3007 tree tem2 = make_ssa_name (char_vectype);
3008 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3009 tem, tem, bswap_vconst);
86a91c0a 3010 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
37b14185
RB
3011 tem = make_ssa_name (vectype);
3012 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3013 vectype, tem2));
86a91c0a
RS
3014 new_stmt_info
3015 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
37b14185 3016 if (slp_node)
e1bd7296 3017 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
37b14185
RB
3018 }
3019
3020 if (slp_node)
3021 continue;
3022
3023 if (j == 0)
e1bd7296 3024 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
37b14185 3025 else
e1bd7296 3026 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
37b14185 3027
e1bd7296 3028 prev_stmt_info = new_stmt_info;
37b14185
RB
3029 }
3030
3031 vec_oprnds.release ();
3032 return true;
3033}
3034
b1b6836e
RS
3035/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3036 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3037 in a single step. On success, store the binary pack code in
3038 *CONVERT_CODE. */
3039
3040static bool
3041simple_integer_narrowing (tree vectype_out, tree vectype_in,
3042 tree_code *convert_code)
3043{
3044 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3045 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3046 return false;
3047
3048 tree_code code;
3049 int multi_step_cvt = 0;
3050 auto_vec <tree, 8> interm_types;
3051 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3052 &code, &multi_step_cvt,
3053 &interm_types)
3054 || multi_step_cvt)
3055 return false;
3056
3057 *convert_code = code;
3058 return true;
3059}
5ce9450f 3060
ebfd146a
IR
3061/* Function vectorizable_call.
3062
32e8e429
RS
3063 Check if STMT_INFO performs a function call that can be vectorized.
3064 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3065 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3066 Return true if STMT_INFO is vectorizable in this way. */
ebfd146a
IR
3067
3068static bool
32e8e429 3069vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195
RS
3070 stmt_vec_info *vec_stmt, slp_tree slp_node,
3071 stmt_vector_for_cost *cost_vec)
ebfd146a 3072{
538dd0b7 3073 gcall *stmt;
ebfd146a
IR
3074 tree vec_dest;
3075 tree scalar_dest;
0267732b 3076 tree op;
ebfd146a 3077 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
32e8e429 3078 stmt_vec_info prev_stmt_info;
ebfd146a 3079 tree vectype_out, vectype_in;
c7bda0f4
RS
3080 poly_uint64 nunits_in;
3081 poly_uint64 nunits_out;
ebfd146a 3082 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3083 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3084 vec_info *vinfo = stmt_info->vinfo;
81c40241 3085 tree fndecl, new_temp, rhs_type;
2c58d42c
RS
3086 enum vect_def_type dt[4]
3087 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3088 vect_unknown_def_type };
3089 int ndts = ARRAY_SIZE (dt);
ebfd146a 3090 int ncopies, j;
2c58d42c
RS
3091 auto_vec<tree, 8> vargs;
3092 auto_vec<tree, 8> orig_vargs;
ebfd146a
IR
3093 enum { NARROW, NONE, WIDEN } modifier;
3094 size_t i, nargs;
9d5e7640 3095 tree lhs;
ebfd146a 3096
190c2236 3097 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3098 return false;
3099
66c16fd9
RB
3100 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3101 && ! vec_stmt)
ebfd146a
IR
3102 return false;
3103
86a91c0a
RS
3104 /* Is STMT_INFO a vectorizable call? */
3105 stmt = dyn_cast <gcall *> (stmt_info->stmt);
538dd0b7 3106 if (!stmt)
ebfd146a
IR
3107 return false;
3108
5ce9450f 3109 if (gimple_call_internal_p (stmt)
bfaa08b7 3110 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3111 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3112 /* Handled by vectorizable_load and vectorizable_store. */
3113 return false;
5ce9450f 3114
0136f8f0
AH
3115 if (gimple_call_lhs (stmt) == NULL_TREE
3116 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3117 return false;
3118
36bbc05d 3119 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
5a2c1986 3120
b690cc0f
RG
3121 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3122
ebfd146a
IR
3123 /* Process function arguments. */
3124 rhs_type = NULL_TREE;
b690cc0f 3125 vectype_in = NULL_TREE;
ebfd146a
IR
3126 nargs = gimple_call_num_args (stmt);
3127
1b1562a5
MM
3128 /* Bail out if the function has more than three arguments, we do not have
3129 interesting builtin functions to vectorize with more than two arguments
3130 except for fma. No arguments is also not good. */
2c58d42c 3131 if (nargs == 0 || nargs > 4)
ebfd146a
IR
3132 return false;
3133
74bf76ed 3134 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2c58d42c
RS
3135 combined_fn cfn = gimple_call_combined_fn (stmt);
3136 if (cfn == CFN_GOMP_SIMD_LANE)
74bf76ed
JJ
3137 {
3138 nargs = 0;
3139 rhs_type = unsigned_type_node;
3140 }
3141
2c58d42c
RS
3142 int mask_opno = -1;
3143 if (internal_fn_p (cfn))
3144 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3145
ebfd146a
IR
3146 for (i = 0; i < nargs; i++)
3147 {
b690cc0f
RG
3148 tree opvectype;
3149
ebfd146a 3150 op = gimple_call_arg (stmt, i);
2c58d42c
RS
3151 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3152 {
3153 if (dump_enabled_p ())
3154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3155 "use not simple.\n");
3156 return false;
3157 }
3158
3159 /* Skip the mask argument to an internal function. This operand
3160 has been converted via a pattern if necessary. */
3161 if ((int) i == mask_opno)
3162 continue;
ebfd146a
IR
3163
3164 /* We can only handle calls with arguments of the same type. */
3165 if (rhs_type
8533c9d8 3166 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3167 {
73fbfcad 3168 if (dump_enabled_p ())
78c60e3d 3169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3170 "argument types differ.\n");
ebfd146a
IR
3171 return false;
3172 }
b690cc0f
RG
3173 if (!rhs_type)
3174 rhs_type = TREE_TYPE (op);
ebfd146a 3175
b690cc0f
RG
3176 if (!vectype_in)
3177 vectype_in = opvectype;
3178 else if (opvectype
3179 && opvectype != vectype_in)
3180 {
73fbfcad 3181 if (dump_enabled_p ())
78c60e3d 3182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3183 "argument vector types differ.\n");
b690cc0f
RG
3184 return false;
3185 }
3186 }
3187 /* If all arguments are external or constant defs use a vector type with
3188 the same size as the output vector type. */
ebfd146a 3189 if (!vectype_in)
b690cc0f 3190 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3191 if (vec_stmt)
3192 gcc_assert (vectype_in);
3193 if (!vectype_in)
3194 {
73fbfcad 3195 if (dump_enabled_p ())
3c2a8ed0
DM
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197 "no vectype for scalar type %T\n", rhs_type);
7d8930a0
IR
3198
3199 return false;
3200 }
ebfd146a
IR
3201
3202 /* FORNOW */
b690cc0f
RG
3203 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3204 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3205 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3206 modifier = NARROW;
c7bda0f4 3207 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3208 modifier = NONE;
c7bda0f4 3209 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3210 modifier = WIDEN;
3211 else
3212 return false;
3213
70439f0d
RS
3214 /* We only handle functions that do not read or clobber memory. */
3215 if (gimple_vuse (stmt))
3216 {
3217 if (dump_enabled_p ())
3218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3219 "function reads from or writes to memory.\n");
3220 return false;
3221 }
3222
ebfd146a
IR
3223 /* For now, we only vectorize functions if a target specific builtin
3224 is available. TODO -- in some cases, it might be profitable to
3225 insert the calls for pieces of the vector, in order to be able
3226 to vectorize other operations in the loop. */
70439f0d
RS
3227 fndecl = NULL_TREE;
3228 internal_fn ifn = IFN_LAST;
70439f0d
RS
3229 tree callee = gimple_call_fndecl (stmt);
3230
3231 /* First try using an internal function. */
b1b6836e
RS
3232 tree_code convert_code = ERROR_MARK;
3233 if (cfn != CFN_LAST
3234 && (modifier == NONE
3235 || (modifier == NARROW
3236 && simple_integer_narrowing (vectype_out, vectype_in,
3237 &convert_code))))
70439f0d
RS
3238 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3239 vectype_in);
3240
3241 /* If that fails, try asking for a target-specific built-in function. */
3242 if (ifn == IFN_LAST)
3243 {
3244 if (cfn != CFN_LAST)
3245 fndecl = targetm.vectorize.builtin_vectorized_function
3246 (cfn, vectype_out, vectype_in);
7672aa9b 3247 else if (callee)
70439f0d
RS
3248 fndecl = targetm.vectorize.builtin_md_vectorized_function
3249 (callee, vectype_out, vectype_in);
3250 }
3251
3252 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3253 {
70439f0d 3254 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3255 && !slp_node
3256 && loop_vinfo
3257 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3258 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3259 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3260 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3261 {
3262 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3263 { 0, 1, 2, ... vf - 1 } vector. */
3264 gcc_assert (nargs == 0);
3265 }
37b14185
RB
3266 else if (modifier == NONE
3267 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3268 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3269 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
86a91c0a 3270 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
e4057a39 3271 vectype_in, cost_vec);
74bf76ed
JJ
3272 else
3273 {
3274 if (dump_enabled_p ())
3275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3276 "function is not vectorizable.\n");
74bf76ed
JJ
3277 return false;
3278 }
ebfd146a
IR
3279 }
3280
fce57248 3281 if (slp_node)
190c2236 3282 ncopies = 1;
b1b6836e 3283 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3284 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3285 else
e8f142e2 3286 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3287
3288 /* Sanity check: make sure that at least one copy of the vectorized stmt
3289 needs to be generated. */
3290 gcc_assert (ncopies >= 1);
3291
ed623edb 3292 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
ebfd146a
IR
3293 if (!vec_stmt) /* transformation not required. */
3294 {
3295 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3296 DUMP_VECT_SCOPE ("vectorizable_call");
68435eb2
RB
3297 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3298 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3299 record_stmt_cost (cost_vec, ncopies / 2,
3300 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3301
2c58d42c
RS
3302 if (loop_vinfo && mask_opno >= 0)
3303 {
3304 unsigned int nvectors = (slp_node
3305 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3306 : ncopies);
3307 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3308 }
ebfd146a
IR
3309 return true;
3310 }
3311
67b8dbac 3312 /* Transform. */
ebfd146a 3313
73fbfcad 3314 if (dump_enabled_p ())
e645e942 3315 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3316
3317 /* Handle def. */
3318 scalar_dest = gimple_call_lhs (stmt);
3319 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3320
2c58d42c
RS
3321 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3322
e1bd7296 3323 stmt_vec_info new_stmt_info = NULL;
ebfd146a 3324 prev_stmt_info = NULL;
b1b6836e 3325 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3326 {
b1b6836e 3327 tree prev_res = NULL_TREE;
2c58d42c
RS
3328 vargs.safe_grow (nargs);
3329 orig_vargs.safe_grow (nargs);
ebfd146a
IR
3330 for (j = 0; j < ncopies; ++j)
3331 {
3332 /* Build argument list for the vectorized call. */
190c2236
JJ
3333 if (slp_node)
3334 {
ef062b13 3335 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3336 vec<tree> vec_oprnds0;
190c2236
JJ
3337
3338 for (i = 0; i < nargs; i++)
2c58d42c 3339 vargs[i] = gimple_call_arg (stmt, i);
306b0c92 3340 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3341 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3342
3343 /* Arguments are ready. Create the new vector stmt. */
9771b263 3344 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3345 {
3346 size_t k;
3347 for (k = 0; k < nargs; k++)
3348 {
37b5ec8f 3349 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3350 vargs[k] = vec_oprndsk[i];
190c2236 3351 }
b1b6836e
RS
3352 if (modifier == NARROW)
3353 {
2c58d42c
RS
3354 /* We don't define any narrowing conditional functions
3355 at present. */
3356 gcc_assert (mask_opno < 0);
b1b6836e 3357 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3358 gcall *call
3359 = gimple_build_call_internal_vec (ifn, vargs);
3360 gimple_call_set_lhs (call, half_res);
3361 gimple_call_set_nothrow (call, true);
e1bd7296 3362 new_stmt_info
86a91c0a 3363 = vect_finish_stmt_generation (stmt_info, call, gsi);
b1b6836e
RS
3364 if ((i & 1) == 0)
3365 {
3366 prev_res = half_res;
3367 continue;
3368 }
3369 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3370 gimple *new_stmt
3371 = gimple_build_assign (new_temp, convert_code,
3372 prev_res, half_res);
3373 new_stmt_info
86a91c0a
RS
3374 = vect_finish_stmt_generation (stmt_info, new_stmt,
3375 gsi);
b1b6836e 3376 }
70439f0d 3377 else
b1b6836e 3378 {
2c58d42c
RS
3379 if (mask_opno >= 0 && masked_loop_p)
3380 {
3381 unsigned int vec_num = vec_oprnds0.length ();
3382 /* Always true for SLP. */
3383 gcc_assert (ncopies == 1);
3384 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3385 vectype_out, i);
3386 vargs[mask_opno] = prepare_load_store_mask
3387 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3388 }
3389
a844293d 3390 gcall *call;
b1b6836e 3391 if (ifn != IFN_LAST)
a844293d 3392 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3393 else
a844293d
RS
3394 call = gimple_build_call_vec (fndecl, vargs);
3395 new_temp = make_ssa_name (vec_dest, call);
3396 gimple_call_set_lhs (call, new_temp);
3397 gimple_call_set_nothrow (call, true);
e1bd7296 3398 new_stmt_info
86a91c0a 3399 = vect_finish_stmt_generation (stmt_info, call, gsi);
b1b6836e 3400 }
e1bd7296 3401 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
190c2236
JJ
3402 }
3403
3404 for (i = 0; i < nargs; i++)
3405 {
37b5ec8f 3406 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3407 vec_oprndsi.release ();
190c2236 3408 }
190c2236
JJ
3409 continue;
3410 }
3411
ebfd146a
IR
3412 for (i = 0; i < nargs; i++)
3413 {
3414 op = gimple_call_arg (stmt, i);
3415 if (j == 0)
3416 vec_oprnd0
86a91c0a 3417 = vect_get_vec_def_for_operand (op, stmt_info);
ebfd146a 3418 else
2c58d42c 3419 vec_oprnd0
e4057a39 3420 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
2c58d42c
RS
3421
3422 orig_vargs[i] = vargs[i] = vec_oprnd0;
3423 }
ebfd146a 3424
2c58d42c
RS
3425 if (mask_opno >= 0 && masked_loop_p)
3426 {
3427 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3428 vectype_out, j);
3429 vargs[mask_opno]
3430 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3431 vargs[mask_opno], gsi);
ebfd146a
IR
3432 }
3433
2c58d42c 3434 if (cfn == CFN_GOMP_SIMD_LANE)
74bf76ed 3435 {
c7bda0f4 3436 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3437 tree new_var
0e22bb5a 3438 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3439 gimple *init_stmt = gimple_build_assign (new_var, cst);
86a91c0a 3440 vect_init_vector_1 (stmt_info, init_stmt, NULL);
b731b390 3441 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3442 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3443 new_stmt_info
86a91c0a 3444 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
74bf76ed 3445 }
b1b6836e
RS
3446 else if (modifier == NARROW)
3447 {
2c58d42c
RS
3448 /* We don't define any narrowing conditional functions at
3449 present. */
3450 gcc_assert (mask_opno < 0);
b1b6836e 3451 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3452 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3453 gimple_call_set_lhs (call, half_res);
3454 gimple_call_set_nothrow (call, true);
86a91c0a
RS
3455 new_stmt_info
3456 = vect_finish_stmt_generation (stmt_info, call, gsi);
b1b6836e
RS
3457 if ((j & 1) == 0)
3458 {
3459 prev_res = half_res;
3460 continue;
3461 }
3462 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3463 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3464 prev_res, half_res);
3465 new_stmt_info
86a91c0a 3466 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
b1b6836e 3467 }
74bf76ed
JJ
3468 else
3469 {
a844293d 3470 gcall *call;
70439f0d 3471 if (ifn != IFN_LAST)
a844293d 3472 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3473 else
a844293d 3474 call = gimple_build_call_vec (fndecl, vargs);
e1bd7296 3475 new_temp = make_ssa_name (vec_dest, call);
a844293d
RS
3476 gimple_call_set_lhs (call, new_temp);
3477 gimple_call_set_nothrow (call, true);
86a91c0a
RS
3478 new_stmt_info
3479 = vect_finish_stmt_generation (stmt_info, call, gsi);
74bf76ed 3480 }
ebfd146a 3481
b1b6836e 3482 if (j == (modifier == NARROW ? 1 : 0))
e1bd7296 3483 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
ebfd146a 3484 else
e1bd7296 3485 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
ebfd146a 3486
e1bd7296 3487 prev_stmt_info = new_stmt_info;
ebfd146a 3488 }
b1b6836e
RS
3489 }
3490 else if (modifier == NARROW)
3491 {
2c58d42c
RS
3492 /* We don't define any narrowing conditional functions at present. */
3493 gcc_assert (mask_opno < 0);
ebfd146a
IR
3494 for (j = 0; j < ncopies; ++j)
3495 {
3496 /* Build argument list for the vectorized call. */
3497 if (j == 0)
9771b263 3498 vargs.create (nargs * 2);
ebfd146a 3499 else
9771b263 3500 vargs.truncate (0);
ebfd146a 3501
190c2236
JJ
3502 if (slp_node)
3503 {
ef062b13 3504 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3505 vec<tree> vec_oprnds0;
190c2236
JJ
3506
3507 for (i = 0; i < nargs; i++)
9771b263 3508 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3509 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3510 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3511
3512 /* Arguments are ready. Create the new vector stmt. */
9771b263 3513 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3514 {
3515 size_t k;
9771b263 3516 vargs.truncate (0);
190c2236
JJ
3517 for (k = 0; k < nargs; k++)
3518 {
37b5ec8f 3519 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3520 vargs.quick_push (vec_oprndsk[i]);
3521 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3522 }
a844293d 3523 gcall *call;
70439f0d 3524 if (ifn != IFN_LAST)
a844293d 3525 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3526 else
a844293d
RS
3527 call = gimple_build_call_vec (fndecl, vargs);
3528 new_temp = make_ssa_name (vec_dest, call);
3529 gimple_call_set_lhs (call, new_temp);
3530 gimple_call_set_nothrow (call, true);
e1bd7296 3531 new_stmt_info
86a91c0a 3532 = vect_finish_stmt_generation (stmt_info, call, gsi);
e1bd7296 3533 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
190c2236
JJ
3534 }
3535
3536 for (i = 0; i < nargs; i++)
3537 {
37b5ec8f 3538 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3539 vec_oprndsi.release ();
190c2236 3540 }
190c2236
JJ
3541 continue;
3542 }
3543
ebfd146a
IR
3544 for (i = 0; i < nargs; i++)
3545 {
3546 op = gimple_call_arg (stmt, i);
3547 if (j == 0)
3548 {
3549 vec_oprnd0
86a91c0a 3550 = vect_get_vec_def_for_operand (op, stmt_info);
ebfd146a 3551 vec_oprnd1
e4057a39 3552 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
ebfd146a
IR
3553 }
3554 else
3555 {
e1bd7296
RS
3556 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3557 2 * i + 1);
ebfd146a 3558 vec_oprnd0
e4057a39 3559 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
ebfd146a 3560 vec_oprnd1
e4057a39 3561 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
ebfd146a
IR
3562 }
3563
9771b263
DN
3564 vargs.quick_push (vec_oprnd0);
3565 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3566 }
3567
e1bd7296 3568 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3569 new_temp = make_ssa_name (vec_dest, new_stmt);
3570 gimple_call_set_lhs (new_stmt, new_temp);
86a91c0a
RS
3571 new_stmt_info
3572 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
ebfd146a
IR
3573
3574 if (j == 0)
e1bd7296 3575 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
ebfd146a 3576 else
e1bd7296 3577 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
ebfd146a 3578
e1bd7296 3579 prev_stmt_info = new_stmt_info;
ebfd146a
IR
3580 }
3581
3582 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3583 }
b1b6836e
RS
3584 else
3585 /* No current target implements this case. */
3586 return false;
ebfd146a 3587
9771b263 3588 vargs.release ();
ebfd146a 3589
ebfd146a
IR
3590 /* The call in STMT might prevent it from being removed in dce.
3591 We however cannot remove it here, due to the way the ssa name
3592 it defines is mapped to the new definition. So just replace
3593 rhs of the statement with something harmless. */
3594
dd34c087
JJ
3595 if (slp_node)
3596 return true;
3597
211cd1e2 3598 stmt_info = vect_orig_stmt (stmt_info);
ed7b8123 3599 lhs = gimple_get_lhs (stmt_info->stmt);
3cc2fa2a 3600
e1bd7296
RS
3601 gassign *new_stmt
3602 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
9d97912b 3603 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
ebfd146a
IR
3604
3605 return true;
3606}
3607
3608
0136f8f0
AH
3609struct simd_call_arg_info
3610{
3611 tree vectype;
3612 tree op;
0136f8f0 3613 HOST_WIDE_INT linear_step;
34e82342 3614 enum vect_def_type dt;
0136f8f0 3615 unsigned int align;
17b658af 3616 bool simd_lane_linear;
0136f8f0
AH
3617};
3618
17b658af
JJ
3619/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3620 is linear within simd lane (but not within whole loop), note it in
3621 *ARGINFO. */
3622
3623static void
3624vect_simd_lane_linear (tree op, struct loop *loop,
3625 struct simd_call_arg_info *arginfo)
3626{
355fe088 3627 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3628
3629 if (!is_gimple_assign (def_stmt)
3630 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3631 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3632 return;
3633
3634 tree base = gimple_assign_rhs1 (def_stmt);
3635 HOST_WIDE_INT linear_step = 0;
3636 tree v = gimple_assign_rhs2 (def_stmt);
3637 while (TREE_CODE (v) == SSA_NAME)
3638 {
3639 tree t;
3640 def_stmt = SSA_NAME_DEF_STMT (v);
3641 if (is_gimple_assign (def_stmt))
3642 switch (gimple_assign_rhs_code (def_stmt))
3643 {
3644 case PLUS_EXPR:
3645 t = gimple_assign_rhs2 (def_stmt);
3646 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3647 return;
3648 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3649 v = gimple_assign_rhs1 (def_stmt);
3650 continue;
3651 case MULT_EXPR:
3652 t = gimple_assign_rhs2 (def_stmt);
3653 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3654 return;
3655 linear_step = tree_to_shwi (t);
3656 v = gimple_assign_rhs1 (def_stmt);
3657 continue;
3658 CASE_CONVERT:
3659 t = gimple_assign_rhs1 (def_stmt);
3660 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3661 || (TYPE_PRECISION (TREE_TYPE (v))
3662 < TYPE_PRECISION (TREE_TYPE (t))))
3663 return;
3664 if (!linear_step)
3665 linear_step = 1;
3666 v = t;
3667 continue;
3668 default:
3669 return;
3670 }
8e4284d0 3671 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3672 && loop->simduid
3673 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3674 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3675 == loop->simduid))
3676 {
3677 if (!linear_step)
3678 linear_step = 1;
3679 arginfo->linear_step = linear_step;
3680 arginfo->op = base;
3681 arginfo->simd_lane_linear = true;
3682 return;
3683 }
3684 }
3685}
3686
cf1b2ba4
RS
3687/* Return the number of elements in vector type VECTYPE, which is associated
3688 with a SIMD clone. At present these vectors always have a constant
3689 length. */
3690
3691static unsigned HOST_WIDE_INT
3692simd_clone_subparts (tree vectype)
3693{
928686b1 3694 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3695}
3696
0136f8f0
AH
3697/* Function vectorizable_simd_clone_call.
3698
32e8e429 3699 Check if STMT_INFO performs a function call that can be vectorized
0136f8f0 3700 by calling a simd clone of the function.
32e8e429
RS
3701 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3702 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3703 Return true if STMT_INFO is vectorizable in this way. */
0136f8f0
AH
3704
3705static bool
32e8e429
RS
3706vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3707 gimple_stmt_iterator *gsi,
1eede195 3708 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 3709 stmt_vector_for_cost *)
0136f8f0
AH
3710{
3711 tree vec_dest;
3712 tree scalar_dest;
3713 tree op, type;
3714 tree vec_oprnd0 = NULL_TREE;
32e8e429 3715 stmt_vec_info prev_stmt_info;
0136f8f0
AH
3716 tree vectype;
3717 unsigned int nunits;
3718 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3719 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3720 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3721 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3722 tree fndecl, new_temp;
0136f8f0 3723 int ncopies, j;
00426f9a 3724 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3725 vec<tree> vargs = vNULL;
3726 size_t i, nargs;
3727 tree lhs, rtype, ratype;
e7a74006 3728 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3729
3730 /* Is STMT a vectorizable call? */
32e8e429
RS
3731 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3732 if (!stmt)
0136f8f0
AH
3733 return false;
3734
3735 fndecl = gimple_call_fndecl (stmt);
3736 if (fndecl == NULL_TREE)
3737 return false;
3738
d52f5295 3739 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3740 if (node == NULL || node->simd_clones == NULL)
3741 return false;
3742
3743 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3744 return false;
3745
66c16fd9
RB
3746 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3747 && ! vec_stmt)
0136f8f0
AH
3748 return false;
3749
3750 if (gimple_call_lhs (stmt)
3751 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3752 return false;
3753
36bbc05d 3754 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
0136f8f0
AH
3755
3756 vectype = STMT_VINFO_VECTYPE (stmt_info);
3757
86a91c0a 3758 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
0136f8f0
AH
3759 return false;
3760
3761 /* FORNOW */
fce57248 3762 if (slp_node)
0136f8f0
AH
3763 return false;
3764
3765 /* Process function arguments. */
3766 nargs = gimple_call_num_args (stmt);
3767
3768 /* Bail out if the function has zero arguments. */
3769 if (nargs == 0)
3770 return false;
3771
00426f9a 3772 arginfo.reserve (nargs, true);
0136f8f0
AH
3773
3774 for (i = 0; i < nargs; i++)
3775 {
3776 simd_call_arg_info thisarginfo;
3777 affine_iv iv;
3778
3779 thisarginfo.linear_step = 0;
3780 thisarginfo.align = 0;
3781 thisarginfo.op = NULL_TREE;
17b658af 3782 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3783
3784 op = gimple_call_arg (stmt, i);
894dd753 3785 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
81c40241 3786 &thisarginfo.vectype)
0136f8f0
AH
3787 || thisarginfo.dt == vect_uninitialized_def)
3788 {
3789 if (dump_enabled_p ())
3790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3791 "use not simple.\n");
0136f8f0
AH
3792 return false;
3793 }
3794
3795 if (thisarginfo.dt == vect_constant_def
3796 || thisarginfo.dt == vect_external_def)
3797 gcc_assert (thisarginfo.vectype == NULL_TREE);
3798 else
3799 gcc_assert (thisarginfo.vectype != NULL_TREE);
3800
6c9e85fb
JJ
3801 /* For linear arguments, the analyze phase should have saved
3802 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3803 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3804 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3805 {
3806 gcc_assert (vec_stmt);
3807 thisarginfo.linear_step
17b658af 3808 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3809 thisarginfo.op
17b658af
JJ
3810 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3811 thisarginfo.simd_lane_linear
3812 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3813 == boolean_true_node);
6c9e85fb
JJ
3814 /* If loop has been peeled for alignment, we need to adjust it. */
3815 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3816 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3817 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3818 {
3819 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3820 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3821 tree opt = TREE_TYPE (thisarginfo.op);
3822 bias = fold_convert (TREE_TYPE (step), bias);
3823 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3824 thisarginfo.op
3825 = fold_build2 (POINTER_TYPE_P (opt)
3826 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3827 thisarginfo.op, bias);
3828 }
3829 }
3830 else if (!vec_stmt
3831 && thisarginfo.dt != vect_constant_def
3832 && thisarginfo.dt != vect_external_def
3833 && loop_vinfo
3834 && TREE_CODE (op) == SSA_NAME
3835 && simple_iv (loop, loop_containing_stmt (stmt), op,
3836 &iv, false)
3837 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3838 {
3839 thisarginfo.linear_step = tree_to_shwi (iv.step);
3840 thisarginfo.op = iv.base;
3841 }
3842 else if ((thisarginfo.dt == vect_constant_def
3843 || thisarginfo.dt == vect_external_def)
3844 && POINTER_TYPE_P (TREE_TYPE (op)))
3845 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3846 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3847 linear too. */
3848 if (POINTER_TYPE_P (TREE_TYPE (op))
3849 && !thisarginfo.linear_step
3850 && !vec_stmt
3851 && thisarginfo.dt != vect_constant_def
3852 && thisarginfo.dt != vect_external_def
3853 && loop_vinfo
3854 && !slp_node
3855 && TREE_CODE (op) == SSA_NAME)
3856 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3857
3858 arginfo.quick_push (thisarginfo);
3859 }
3860
d9f21f6a
RS
3861 unsigned HOST_WIDE_INT vf;
3862 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3863 {
3864 if (dump_enabled_p ())
3865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3866 "not considering SIMD clones; not yet supported"
3867 " for variable-width vectors.\n");
888157af 3868 return false;
d9f21f6a
RS
3869 }
3870
0136f8f0
AH
3871 unsigned int badness = 0;
3872 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3873 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3874 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3875 else
3876 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3877 n = n->simdclone->next_clone)
3878 {
3879 unsigned int this_badness = 0;
d9f21f6a 3880 if (n->simdclone->simdlen > vf
0136f8f0
AH
3881 || n->simdclone->nargs != nargs)
3882 continue;
d9f21f6a
RS
3883 if (n->simdclone->simdlen < vf)
3884 this_badness += (exact_log2 (vf)
0136f8f0
AH
3885 - exact_log2 (n->simdclone->simdlen)) * 1024;
3886 if (n->simdclone->inbranch)
3887 this_badness += 2048;
3888 int target_badness = targetm.simd_clone.usable (n);
3889 if (target_badness < 0)
3890 continue;
3891 this_badness += target_badness * 512;
3892 /* FORNOW: Have to add code to add the mask argument. */
3893 if (n->simdclone->inbranch)
3894 continue;
3895 for (i = 0; i < nargs; i++)
3896 {
3897 switch (n->simdclone->args[i].arg_type)
3898 {
3899 case SIMD_CLONE_ARG_TYPE_VECTOR:
3900 if (!useless_type_conversion_p
3901 (n->simdclone->args[i].orig_type,
3902 TREE_TYPE (gimple_call_arg (stmt, i))))
3903 i = -1;
3904 else if (arginfo[i].dt == vect_constant_def
3905 || arginfo[i].dt == vect_external_def
3906 || arginfo[i].linear_step)
3907 this_badness += 64;
3908 break;
3909 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3910 if (arginfo[i].dt != vect_constant_def
3911 && arginfo[i].dt != vect_external_def)
3912 i = -1;
3913 break;
3914 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3915 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3916 if (arginfo[i].dt == vect_constant_def
3917 || arginfo[i].dt == vect_external_def
3918 || (arginfo[i].linear_step
3919 != n->simdclone->args[i].linear_step))
3920 i = -1;
3921 break;
3922 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3923 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3928 /* FORNOW */
3929 i = -1;
3930 break;
3931 case SIMD_CLONE_ARG_TYPE_MASK:
3932 gcc_unreachable ();
3933 }
3934 if (i == (size_t) -1)
3935 break;
3936 if (n->simdclone->args[i].alignment > arginfo[i].align)
3937 {
3938 i = -1;
3939 break;
3940 }
3941 if (arginfo[i].align)
3942 this_badness += (exact_log2 (arginfo[i].align)
3943 - exact_log2 (n->simdclone->args[i].alignment));
3944 }
3945 if (i == (size_t) -1)
3946 continue;
3947 if (bestn == NULL || this_badness < badness)
3948 {
3949 bestn = n;
3950 badness = this_badness;
3951 }
3952 }
3953
3954 if (bestn == NULL)
00426f9a 3955 return false;
0136f8f0
AH
3956
3957 for (i = 0; i < nargs; i++)
3958 if ((arginfo[i].dt == vect_constant_def
3959 || arginfo[i].dt == vect_external_def)
3960 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3961 {
3962 arginfo[i].vectype
3963 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3964 i)));
3965 if (arginfo[i].vectype == NULL
cf1b2ba4 3966 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3967 > bestn->simdclone->simdlen))
00426f9a 3968 return false;
0136f8f0
AH
3969 }
3970
3971 fndecl = bestn->decl;
3972 nunits = bestn->simdclone->simdlen;
d9f21f6a 3973 ncopies = vf / nunits;
0136f8f0
AH
3974
3975 /* If the function isn't const, only allow it in simd loops where user
3976 has asserted that at least nunits consecutive iterations can be
3977 performed using SIMD instructions. */
3978 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3979 && gimple_vuse (stmt))
00426f9a 3980 return false;
0136f8f0
AH
3981
3982 /* Sanity check: make sure that at least one copy of the vectorized stmt
3983 needs to be generated. */
3984 gcc_assert (ncopies >= 1);
3985
3986 if (!vec_stmt) /* transformation not required. */
3987 {
6c9e85fb
JJ
3988 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3989 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3990 if ((bestn->simdclone->args[i].arg_type
3991 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3992 || (bestn->simdclone->args[i].arg_type
3993 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3994 {
17b658af 3995 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3996 + 1);
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3998 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3999 ? size_type_node : TREE_TYPE (arginfo[i].op);
4000 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4001 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4002 tree sll = arginfo[i].simd_lane_linear
4003 ? boolean_true_node : boolean_false_node;
4004 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4005 }
0136f8f0 4006 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
adac3a68 4007 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
68435eb2 4008/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4009 return true;
4010 }
4011
67b8dbac 4012 /* Transform. */
0136f8f0
AH
4013
4014 if (dump_enabled_p ())
4015 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4016
4017 /* Handle def. */
4018 scalar_dest = gimple_call_lhs (stmt);
4019 vec_dest = NULL_TREE;
4020 rtype = NULL_TREE;
4021 ratype = NULL_TREE;
4022 if (scalar_dest)
4023 {
4024 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4025 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4026 if (TREE_CODE (rtype) == ARRAY_TYPE)
4027 {
4028 ratype = rtype;
4029 rtype = TREE_TYPE (ratype);
4030 }
4031 }
4032
4033 prev_stmt_info = NULL;
4034 for (j = 0; j < ncopies; ++j)
4035 {
4036 /* Build argument list for the vectorized call. */
4037 if (j == 0)
4038 vargs.create (nargs);
4039 else
4040 vargs.truncate (0);
4041
4042 for (i = 0; i < nargs; i++)
4043 {
4044 unsigned int k, l, m, o;
4045 tree atype;
4046 op = gimple_call_arg (stmt, i);
4047 switch (bestn->simdclone->args[i].arg_type)
4048 {
4049 case SIMD_CLONE_ARG_TYPE_VECTOR:
4050 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4051 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4052 for (m = j * o; m < (j + 1) * o; m++)
4053 {
cf1b2ba4
RS
4054 if (simd_clone_subparts (atype)
4055 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4056 {
73a699ae 4057 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4058 k = (simd_clone_subparts (arginfo[i].vectype)
4059 / simd_clone_subparts (atype));
0136f8f0
AH
4060 gcc_assert ((k & (k - 1)) == 0);
4061 if (m == 0)
4062 vec_oprnd0
86a91c0a 4063 = vect_get_vec_def_for_operand (op, stmt_info);
0136f8f0
AH
4064 else
4065 {
4066 vec_oprnd0 = arginfo[i].op;
4067 if ((m & (k - 1)) == 0)
4068 vec_oprnd0
e4057a39 4069 = vect_get_vec_def_for_stmt_copy (vinfo,
0136f8f0
AH
4070 vec_oprnd0);
4071 }
4072 arginfo[i].op = vec_oprnd0;
4073 vec_oprnd0
4074 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4075 bitsize_int (prec),
0136f8f0 4076 bitsize_int ((m & (k - 1)) * prec));
e1bd7296 4077 gassign *new_stmt
b731b390 4078 = gimple_build_assign (make_ssa_name (atype),
0136f8f0 4079 vec_oprnd0);
86a91c0a 4080 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
0136f8f0
AH
4081 vargs.safe_push (gimple_assign_lhs (new_stmt));
4082 }
4083 else
4084 {
cf1b2ba4
RS
4085 k = (simd_clone_subparts (atype)
4086 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4087 gcc_assert ((k & (k - 1)) == 0);
4088 vec<constructor_elt, va_gc> *ctor_elts;
4089 if (k != 1)
4090 vec_alloc (ctor_elts, k);
4091 else
4092 ctor_elts = NULL;
4093 for (l = 0; l < k; l++)
4094 {
4095 if (m == 0 && l == 0)
4096 vec_oprnd0
86a91c0a 4097 = vect_get_vec_def_for_operand (op, stmt_info);
0136f8f0
AH
4098 else
4099 vec_oprnd0
e4057a39 4100 = vect_get_vec_def_for_stmt_copy (vinfo,
0136f8f0
AH
4101 arginfo[i].op);
4102 arginfo[i].op = vec_oprnd0;
4103 if (k == 1)
4104 break;
4105 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4106 vec_oprnd0);
4107 }
4108 if (k == 1)
4109 vargs.safe_push (vec_oprnd0);
4110 else
4111 {
4112 vec_oprnd0 = build_constructor (atype, ctor_elts);
e1bd7296 4113 gassign *new_stmt
b731b390 4114 = gimple_build_assign (make_ssa_name (atype),
0136f8f0 4115 vec_oprnd0);
86a91c0a
RS
4116 vect_finish_stmt_generation (stmt_info, new_stmt,
4117 gsi);
0136f8f0
AH
4118 vargs.safe_push (gimple_assign_lhs (new_stmt));
4119 }
4120 }
4121 }
4122 break;
4123 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4124 vargs.safe_push (op);
4125 break;
4126 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4127 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4128 if (j == 0)
4129 {
4130 gimple_seq stmts;
4131 arginfo[i].op
4132 = force_gimple_operand (arginfo[i].op, &stmts, true,
4133 NULL_TREE);
4134 if (stmts != NULL)
4135 {
4136 basic_block new_bb;
4137 edge pe = loop_preheader_edge (loop);
4138 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4139 gcc_assert (!new_bb);
4140 }
17b658af
JJ
4141 if (arginfo[i].simd_lane_linear)
4142 {
4143 vargs.safe_push (arginfo[i].op);
4144 break;
4145 }
b731b390 4146 tree phi_res = copy_ssa_name (op);
538dd0b7 4147 gphi *new_phi = create_phi_node (phi_res, loop->header);
4fbeb363 4148 loop_vinfo->add_stmt (new_phi);
0136f8f0
AH
4149 add_phi_arg (new_phi, arginfo[i].op,
4150 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4151 enum tree_code code
4152 = POINTER_TYPE_P (TREE_TYPE (op))
4153 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4154 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4155 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4156 widest_int cst
4157 = wi::mul (bestn->simdclone->args[i].linear_step,
4158 ncopies * nunits);
4159 tree tcst = wide_int_to_tree (type, cst);
b731b390 4160 tree phi_arg = copy_ssa_name (op);
e1bd7296 4161 gassign *new_stmt
0d0e4a03 4162 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4163 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4164 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4fbeb363 4165 loop_vinfo->add_stmt (new_stmt);
0136f8f0
AH
4166 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4167 UNKNOWN_LOCATION);
4168 arginfo[i].op = phi_res;
4169 vargs.safe_push (phi_res);
4170 }
4171 else
4172 {
4173 enum tree_code code
4174 = POINTER_TYPE_P (TREE_TYPE (op))
4175 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4176 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4177 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4178 widest_int cst
4179 = wi::mul (bestn->simdclone->args[i].linear_step,
4180 j * nunits);
4181 tree tcst = wide_int_to_tree (type, cst);
b731b390 4182 new_temp = make_ssa_name (TREE_TYPE (op));
e1bd7296
RS
4183 gassign *new_stmt
4184 = gimple_build_assign (new_temp, code,
4185 arginfo[i].op, tcst);
86a91c0a 4186 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
0136f8f0
AH
4187 vargs.safe_push (new_temp);
4188 }
4189 break;
7adb26f2
JJ
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4191 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4196 default:
4197 gcc_unreachable ();
4198 }
4199 }
4200
e1bd7296 4201 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
0136f8f0
AH
4202 if (vec_dest)
4203 {
cf1b2ba4 4204 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4205 if (ratype)
b731b390 4206 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4207 else if (simd_clone_subparts (vectype)
4208 == simd_clone_subparts (rtype))
e1bd7296 4209 new_temp = make_ssa_name (vec_dest, new_call);
0136f8f0 4210 else
e1bd7296
RS
4211 new_temp = make_ssa_name (rtype, new_call);
4212 gimple_call_set_lhs (new_call, new_temp);
0136f8f0 4213 }
e1bd7296 4214 stmt_vec_info new_stmt_info
86a91c0a 4215 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
0136f8f0
AH
4216
4217 if (vec_dest)
4218 {
cf1b2ba4 4219 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4220 {
4221 unsigned int k, l;
73a699ae
RS
4222 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4223 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4224 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4225 gcc_assert ((k & (k - 1)) == 0);
4226 for (l = 0; l < k; l++)
4227 {
4228 tree t;
4229 if (ratype)
4230 {
4231 t = build_fold_addr_expr (new_temp);
4232 t = build2 (MEM_REF, vectype, t,
73a699ae 4233 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4234 }
4235 else
4236 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4237 bitsize_int (prec), bitsize_int (l * prec));
e1bd7296 4238 gimple *new_stmt
b731b390 4239 = gimple_build_assign (make_ssa_name (vectype), t);
e1bd7296 4240 new_stmt_info
86a91c0a 4241 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
e1bd7296 4242
0136f8f0 4243 if (j == 0 && l == 0)
e1bd7296
RS
4244 STMT_VINFO_VEC_STMT (stmt_info)
4245 = *vec_stmt = new_stmt_info;
0136f8f0 4246 else
e1bd7296 4247 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4248
e1bd7296 4249 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4250 }
4251
4252 if (ratype)
86a91c0a 4253 vect_clobber_variable (stmt_info, gsi, new_temp);
0136f8f0
AH
4254 continue;
4255 }
cf1b2ba4 4256 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4257 {
cf1b2ba4
RS
4258 unsigned int k = (simd_clone_subparts (vectype)
4259 / simd_clone_subparts (rtype));
0136f8f0
AH
4260 gcc_assert ((k & (k - 1)) == 0);
4261 if ((j & (k - 1)) == 0)
4262 vec_alloc (ret_ctor_elts, k);
4263 if (ratype)
4264 {
cf1b2ba4 4265 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4266 for (m = 0; m < o; m++)
4267 {
4268 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4269 size_int (m), NULL_TREE, NULL_TREE);
e1bd7296 4270 gimple *new_stmt
b731b390 4271 = gimple_build_assign (make_ssa_name (rtype), tem);
e1bd7296 4272 new_stmt_info
86a91c0a
RS
4273 = vect_finish_stmt_generation (stmt_info, new_stmt,
4274 gsi);
0136f8f0
AH
4275 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4276 gimple_assign_lhs (new_stmt));
4277 }
86a91c0a 4278 vect_clobber_variable (stmt_info, gsi, new_temp);
0136f8f0
AH
4279 }
4280 else
4281 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4282 if ((j & (k - 1)) != k - 1)
4283 continue;
4284 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
e1bd7296 4285 gimple *new_stmt
b731b390 4286 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
e1bd7296 4287 new_stmt_info
86a91c0a 4288 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
0136f8f0
AH
4289
4290 if ((unsigned) j == k - 1)
e1bd7296 4291 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
0136f8f0 4292 else
e1bd7296 4293 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4294
e1bd7296 4295 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4296 continue;
4297 }
4298 else if (ratype)
4299 {
4300 tree t = build_fold_addr_expr (new_temp);
4301 t = build2 (MEM_REF, vectype, t,
4302 build_int_cst (TREE_TYPE (t), 0));
e1bd7296 4303 gimple *new_stmt
b731b390 4304 = gimple_build_assign (make_ssa_name (vec_dest), t);
e1bd7296 4305 new_stmt_info
86a91c0a
RS
4306 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4307 vect_clobber_variable (stmt_info, gsi, new_temp);
0136f8f0
AH
4308 }
4309 }
4310
4311 if (j == 0)
e1bd7296 4312 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
0136f8f0 4313 else
e1bd7296 4314 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4315
e1bd7296 4316 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4317 }
4318
4319 vargs.release ();
4320
4321 /* The call in STMT might prevent it from being removed in dce.
4322 We however cannot remove it here, due to the way the ssa name
4323 it defines is mapped to the new definition. So just replace
4324 rhs of the statement with something harmless. */
4325
4326 if (slp_node)
4327 return true;
4328
e1bd7296 4329 gimple *new_stmt;
0136f8f0
AH
4330 if (scalar_dest)
4331 {
4332 type = TREE_TYPE (scalar_dest);
211cd1e2 4333 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
0136f8f0
AH
4334 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4335 }
4336 else
4337 new_stmt = gimple_build_nop ();
41b6b80e 4338 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
0136f8f0
AH
4339 unlink_stmt_vdef (stmt);
4340
4341 return true;
4342}
4343
4344
ebfd146a
IR
4345/* Function vect_gen_widened_results_half
4346
4347 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4348 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4349 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4350 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4351 needs to be created (DECL is a function-decl of a target-builtin).
82570274 4352 STMT_INFO is the original scalar stmt that we are vectorizing. */
ebfd146a 4353
355fe088 4354static gimple *
ebfd146a
IR
4355vect_gen_widened_results_half (enum tree_code code,
4356 tree decl,
4357 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4358 tree vec_dest, gimple_stmt_iterator *gsi,
82570274 4359 stmt_vec_info stmt_info)
b8698a0f 4360{
355fe088 4361 gimple *new_stmt;
b8698a0f
L
4362 tree new_temp;
4363
4364 /* Generate half of the widened result: */
4365 if (code == CALL_EXPR)
4366 {
4367 /* Target specific support */
ebfd146a
IR
4368 if (op_type == binary_op)
4369 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4370 else
4371 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4372 new_temp = make_ssa_name (vec_dest, new_stmt);
4373 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4374 }
4375 else
ebfd146a 4376 {
b8698a0f
L
4377 /* Generic support */
4378 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4379 if (op_type != binary_op)
4380 vec_oprnd1 = NULL;
0d0e4a03 4381 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4382 new_temp = make_ssa_name (vec_dest, new_stmt);
4383 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4384 }
82570274 4385 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
ebfd146a 4386
ebfd146a
IR
4387 return new_stmt;
4388}
4389
4a00c761 4390
82570274
RS
4391/* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4392 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4393 containing scalar operand), and for the rest we get a copy with
4a00c761
JJ
4394 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4395 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4396 The vectors are collected into VEC_OPRNDS. */
4397
4398static void
82570274 4399vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
e4057a39 4400 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761 4401{
e4057a39 4402 vec_info *vinfo = stmt_info->vinfo;
4a00c761
JJ
4403 tree vec_oprnd;
4404
4405 /* Get first vector operand. */
4406 /* All the vector operands except the very first one (that is scalar oprnd)
4407 are stmt copies. */
4408 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
82570274 4409 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4a00c761 4410 else
e4057a39 4411 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4a00c761 4412
9771b263 4413 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4414
4415 /* Get second vector operand. */
e4057a39 4416 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
9771b263 4417 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4418
4419 *oprnd = vec_oprnd;
4420
4421 /* For conversion in multiple steps, continue to get operands
4422 recursively. */
4423 if (multi_step_cvt)
e4057a39 4424 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
82570274 4425 multi_step_cvt - 1);
4a00c761
JJ
4426}
4427
4428
4429/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4430 For multi-step conversions store the resulting vectors and call the function
4431 recursively. */
4432
4433static void
9771b263 4434vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
32e8e429
RS
4435 int multi_step_cvt,
4436 stmt_vec_info stmt_info,
9771b263 4437 vec<tree> vec_dsts,
4a00c761
JJ
4438 gimple_stmt_iterator *gsi,
4439 slp_tree slp_node, enum tree_code code,
4440 stmt_vec_info *prev_stmt_info)
4441{
4442 unsigned int i;
4443 tree vop0, vop1, new_tmp, vec_dest;
4a00c761 4444
9771b263 4445 vec_dest = vec_dsts.pop ();
4a00c761 4446
9771b263 4447 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4448 {
4449 /* Create demotion operation. */
9771b263
DN
4450 vop0 = (*vec_oprnds)[i];
4451 vop1 = (*vec_oprnds)[i + 1];
e1bd7296 4452 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4453 new_tmp = make_ssa_name (vec_dest, new_stmt);
4454 gimple_assign_set_lhs (new_stmt, new_tmp);
e1bd7296 4455 stmt_vec_info new_stmt_info
86a91c0a 4456 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4a00c761
JJ
4457
4458 if (multi_step_cvt)
4459 /* Store the resulting vector for next recursive call. */
9771b263 4460 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4461 else
4462 {
4463 /* This is the last step of the conversion sequence. Store the
4464 vectors in SLP_NODE or in vector info of the scalar statement
4465 (or in STMT_VINFO_RELATED_STMT chain). */
4466 if (slp_node)
e1bd7296 4467 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4a00c761 4468 else
c689ce1e
RB
4469 {
4470 if (!*prev_stmt_info)
e1bd7296 4471 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
c689ce1e 4472 else
e1bd7296 4473 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4a00c761 4474
e1bd7296 4475 *prev_stmt_info = new_stmt_info;
c689ce1e 4476 }
4a00c761
JJ
4477 }
4478 }
4479
4480 /* For multi-step demotion operations we first generate demotion operations
4481 from the source type to the intermediate types, and then combine the
4482 results (stored in VEC_OPRNDS) in demotion operation to the destination
4483 type. */
4484 if (multi_step_cvt)
4485 {
4486 /* At each level of recursion we have half of the operands we had at the
4487 previous level. */
9771b263 4488 vec_oprnds->truncate ((i+1)/2);
4a00c761 4489 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
86a91c0a
RS
4490 stmt_info, vec_dsts, gsi,
4491 slp_node, VEC_PACK_TRUNC_EXPR,
4a00c761
JJ
4492 prev_stmt_info);
4493 }
4494
9771b263 4495 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4496}
4497
4498
4499/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
82570274
RS
4500 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4501 STMT_INFO. For multi-step conversions store the resulting vectors and
4502 call the function recursively. */
4a00c761
JJ
4503
4504static void
9771b263
DN
4505vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4506 vec<tree> *vec_oprnds1,
82570274 4507 stmt_vec_info stmt_info, tree vec_dest,
4a00c761
JJ
4508 gimple_stmt_iterator *gsi,
4509 enum tree_code code1,
4510 enum tree_code code2, tree decl1,
4511 tree decl2, int op_type)
4512{
4513 int i;
4514 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4515 gimple *new_stmt1, *new_stmt2;
6e1aa848 4516 vec<tree> vec_tmp = vNULL;
4a00c761 4517
9771b263
DN
4518 vec_tmp.create (vec_oprnds0->length () * 2);
4519 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4520 {
4521 if (op_type == binary_op)
9771b263 4522 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4523 else
4524 vop1 = NULL_TREE;
4525
4526 /* Generate the two halves of promotion operation. */
4527 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
82570274
RS
4528 op_type, vec_dest, gsi,
4529 stmt_info);
4a00c761 4530 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
82570274
RS
4531 op_type, vec_dest, gsi,
4532 stmt_info);
4a00c761
JJ
4533 if (is_gimple_call (new_stmt1))
4534 {
4535 new_tmp1 = gimple_call_lhs (new_stmt1);
4536 new_tmp2 = gimple_call_lhs (new_stmt2);
4537 }
4538 else
4539 {
4540 new_tmp1 = gimple_assign_lhs (new_stmt1);
4541 new_tmp2 = gimple_assign_lhs (new_stmt2);
4542 }
4543
4544 /* Store the results for the next step. */
9771b263
DN
4545 vec_tmp.quick_push (new_tmp1);
4546 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4547 }
4548
689eaba3 4549 vec_oprnds0->release ();
4a00c761
JJ
4550 *vec_oprnds0 = vec_tmp;
4551}
4552
4553
32e8e429
RS
4554/* Check if STMT_INFO performs a conversion operation that can be vectorized.
4555 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4a00c761 4556 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
32e8e429 4557 Return true if STMT_INFO is vectorizable in this way. */
ebfd146a
IR
4558
4559static bool
32e8e429 4560vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195 4561 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 4562 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4563{
4564 tree vec_dest;
4565 tree scalar_dest;
4a00c761 4566 tree op0, op1 = NULL_TREE;
ebfd146a 4567 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
ebfd146a
IR
4568 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4569 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4570 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4571 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4572 tree new_temp;
ebfd146a 4573 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4574 int ndts = 2;
ebfd146a 4575 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4576 poly_uint64 nunits_in;
4577 poly_uint64 nunits_out;
ebfd146a 4578 tree vectype_out, vectype_in;
4a00c761
JJ
4579 int ncopies, i, j;
4580 tree lhs_type, rhs_type;
ebfd146a 4581 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4582 vec<tree> vec_oprnds0 = vNULL;
4583 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4584 tree vop0;
4a00c761 4585 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4586 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4587 int multi_step_cvt = 0;
6e1aa848 4588 vec<tree> interm_types = vNULL;
4a00c761
JJ
4589 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4590 int op_type;
4a00c761 4591 unsigned short fltsz;
ebfd146a
IR
4592
4593 /* Is STMT a vectorizable conversion? */
4594
4a00c761 4595 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4596 return false;
4597
66c16fd9
RB
4598 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4599 && ! vec_stmt)
ebfd146a
IR
4600 return false;
4601
32e8e429
RS
4602 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4603 if (!stmt)
ebfd146a
IR
4604 return false;
4605
4606 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4607 return false;
4608
4609 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4610 if (!CONVERT_EXPR_CODE_P (code)
4611 && code != FIX_TRUNC_EXPR
4612 && code != FLOAT_EXPR
4613 && code != WIDEN_MULT_EXPR
4614 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4615 return false;
4616
4a00c761
JJ
4617 op_type = TREE_CODE_LENGTH (code);
4618
ebfd146a 4619 /* Check types of lhs and rhs. */
b690cc0f 4620 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4621 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4622 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4623
ebfd146a
IR
4624 op0 = gimple_assign_rhs1 (stmt);
4625 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4626
4627 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4628 && !((INTEGRAL_TYPE_P (lhs_type)
4629 && INTEGRAL_TYPE_P (rhs_type))
4630 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4631 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4632 return false;
4633
e6f5c25d
IE
4634 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4635 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4636 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4637 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4638 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4639 {
73fbfcad 4640 if (dump_enabled_p ())
78c60e3d 4641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4642 "type conversion to/from bit-precision unsupported."
4643 "\n");
4a00c761
JJ
4644 return false;
4645 }
4646
b690cc0f 4647 /* Check the operands of the operation. */
894dd753 4648 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
b690cc0f 4649 {
73fbfcad 4650 if (dump_enabled_p ())
78c60e3d 4651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4652 "use not simple.\n");
b690cc0f
RG
4653 return false;
4654 }
4a00c761
JJ
4655 if (op_type == binary_op)
4656 {
4657 bool ok;
4658
4659 op1 = gimple_assign_rhs2 (stmt);
4660 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4661 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4662 OP1. */
4663 if (CONSTANT_CLASS_P (op0))
894dd753 4664 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4a00c761 4665 else
894dd753 4666 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4a00c761
JJ
4667
4668 if (!ok)
4669 {
73fbfcad 4670 if (dump_enabled_p ())
78c60e3d 4671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4672 "use not simple.\n");
4a00c761
JJ
4673 return false;
4674 }
4675 }
4676
b690cc0f
RG
4677 /* If op0 is an external or constant defs use a vector type of
4678 the same size as the output vector type. */
ebfd146a 4679 if (!vectype_in)
b690cc0f 4680 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4681 if (vec_stmt)
4682 gcc_assert (vectype_in);
4683 if (!vectype_in)
4684 {
73fbfcad 4685 if (dump_enabled_p ())
3c2a8ed0
DM
4686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4687 "no vectype for scalar type %T\n", rhs_type);
7d8930a0
IR
4688
4689 return false;
4690 }
ebfd146a 4691
e6f5c25d
IE
4692 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4693 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4694 {
4695 if (dump_enabled_p ())
3c2a8ed0
DM
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4697 "can't convert between boolean and non "
4698 "boolean vectors %T\n", rhs_type);
e6f5c25d
IE
4699
4700 return false;
4701 }
4702
b690cc0f
RG
4703 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4704 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4705 if (known_eq (nunits_out, nunits_in))
ebfd146a 4706 modifier = NONE;
062d5ccc
RS
4707 else if (multiple_p (nunits_out, nunits_in))
4708 modifier = NARROW;
ebfd146a 4709 else
062d5ccc
RS
4710 {
4711 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4712 modifier = WIDEN;
4713 }
ebfd146a 4714
ff802fa1
IR
4715 /* Multiple types in SLP are handled by creating the appropriate number of
4716 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4717 case of SLP. */
fce57248 4718 if (slp_node)
ebfd146a 4719 ncopies = 1;
4a00c761 4720 else if (modifier == NARROW)
e8f142e2 4721 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4722 else
e8f142e2 4723 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4724
ebfd146a
IR
4725 /* Sanity check: make sure that at least one copy of the vectorized stmt
4726 needs to be generated. */
4727 gcc_assert (ncopies >= 1);
4728
16d22000
RS
4729 bool found_mode = false;
4730 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4731 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4732 opt_scalar_mode rhs_mode_iter;
b397965c 4733
ebfd146a 4734 /* Supportable by target? */
4a00c761 4735 switch (modifier)
ebfd146a 4736 {
4a00c761
JJ
4737 case NONE:
4738 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4739 return false;
4740 if (supportable_convert_operation (code, vectype_out, vectype_in,
4741 &decl1, &code1))
4742 break;
4743 /* FALLTHRU */
4744 unsupported:
73fbfcad 4745 if (dump_enabled_p ())
78c60e3d 4746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4747 "conversion not supported by target.\n");
ebfd146a 4748 return false;
ebfd146a 4749
4a00c761 4750 case WIDEN:
86a91c0a
RS
4751 if (supportable_widening_operation (code, stmt_info, vectype_out,
4752 vectype_in, &code1, &code2,
4753 &multi_step_cvt, &interm_types))
4a00c761
JJ
4754 {
4755 /* Binary widening operation can only be supported directly by the
4756 architecture. */
4757 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4758 break;
4759 }
4760
4761 if (code != FLOAT_EXPR
b397965c 4762 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4763 goto unsupported;
4764
b397965c 4765 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4766 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4767 {
16d22000 4768 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4769 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4770 break;
4771
4a00c761
JJ
4772 cvt_type
4773 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4774 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4775 if (cvt_type == NULL_TREE)
4776 goto unsupported;
4777
4778 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4779 {
4780 if (!supportable_convert_operation (code, vectype_out,
4781 cvt_type, &decl1, &codecvt1))
4782 goto unsupported;
4783 }
86a91c0a
RS
4784 else if (!supportable_widening_operation (code, stmt_info,
4785 vectype_out, cvt_type,
4786 &codecvt1, &codecvt2,
4787 &multi_step_cvt,
4a00c761
JJ
4788 &interm_types))
4789 continue;
4790 else
4791 gcc_assert (multi_step_cvt == 0);
4792
86a91c0a 4793 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
a86ec597
RH
4794 vectype_in, &code1, &code2,
4795 &multi_step_cvt, &interm_types))
16d22000
RS
4796 {
4797 found_mode = true;
4798 break;
4799 }
4a00c761
JJ
4800 }
4801
16d22000 4802 if (!found_mode)
4a00c761
JJ
4803 goto unsupported;
4804
4805 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4806 codecvt2 = ERROR_MARK;
4807 else
4808 {
4809 multi_step_cvt++;
9771b263 4810 interm_types.safe_push (cvt_type);
4a00c761
JJ
4811 cvt_type = NULL_TREE;
4812 }
4813 break;
4814
4815 case NARROW:
4816 gcc_assert (op_type == unary_op);
4817 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4818 &code1, &multi_step_cvt,
4819 &interm_types))
4820 break;
4821
4822 if (code != FIX_TRUNC_EXPR
b397965c 4823 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4824 goto unsupported;
4825
4a00c761
JJ
4826 cvt_type
4827 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4828 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4829 if (cvt_type == NULL_TREE)
4830 goto unsupported;
4831 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4832 &decl1, &codecvt1))
4833 goto unsupported;
4834 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4835 &code1, &multi_step_cvt,
4836 &interm_types))
4837 break;
4838 goto unsupported;
4839
4840 default:
4841 gcc_unreachable ();
ebfd146a
IR
4842 }
4843
4844 if (!vec_stmt) /* transformation not required. */
4845 {
adac3a68 4846 DUMP_VECT_SCOPE ("vectorizable_conversion");
4a00c761 4847 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4848 {
4849 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4850 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4851 cost_vec);
8bd37302 4852 }
4a00c761
JJ
4853 else if (modifier == NARROW)
4854 {
4855 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4856 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4857 cost_vec);
4a00c761
JJ
4858 }
4859 else
4860 {
4861 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4862 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4863 cost_vec);
4a00c761 4864 }
9771b263 4865 interm_types.release ();
ebfd146a
IR
4866 return true;
4867 }
4868
67b8dbac 4869 /* Transform. */
73fbfcad 4870 if (dump_enabled_p ())
78c60e3d 4871 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4872 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4873
4a00c761
JJ
4874 if (op_type == binary_op)
4875 {
4876 if (CONSTANT_CLASS_P (op0))
4877 op0 = fold_convert (TREE_TYPE (op1), op0);
4878 else if (CONSTANT_CLASS_P (op1))
4879 op1 = fold_convert (TREE_TYPE (op0), op1);
4880 }
4881
4882 /* In case of multi-step conversion, we first generate conversion operations
4883 to the intermediate types, and then from that types to the final one.
4884 We create vector destinations for the intermediate type (TYPES) received
4885 from supportable_*_operation, and store them in the correct order
4886 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4887 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4888 vec_dest = vect_create_destination_var (scalar_dest,
4889 (cvt_type && modifier == WIDEN)
4890 ? cvt_type : vectype_out);
9771b263 4891 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4892
4893 if (multi_step_cvt)
4894 {
9771b263
DN
4895 for (i = interm_types.length () - 1;
4896 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4897 {
4898 vec_dest = vect_create_destination_var (scalar_dest,
4899 intermediate_type);
9771b263 4900 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4901 }
4902 }
ebfd146a 4903
4a00c761 4904 if (cvt_type)
82294ec1
JJ
4905 vec_dest = vect_create_destination_var (scalar_dest,
4906 modifier == WIDEN
4907 ? vectype_out : cvt_type);
4a00c761
JJ
4908
4909 if (!slp_node)
4910 {
30862efc 4911 if (modifier == WIDEN)
4a00c761 4912 {
c3284718 4913 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4914 if (op_type == binary_op)
9771b263 4915 vec_oprnds1.create (1);
4a00c761 4916 }
30862efc 4917 else if (modifier == NARROW)
9771b263
DN
4918 vec_oprnds0.create (
4919 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4920 }
4921 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4922 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4923
4a00c761 4924 last_oprnd = op0;
ebfd146a
IR
4925 prev_stmt_info = NULL;
4926 switch (modifier)
4927 {
4928 case NONE:
4929 for (j = 0; j < ncopies; j++)
4930 {
ebfd146a 4931 if (j == 0)
86a91c0a
RS
4932 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
4933 NULL, slp_node);
ebfd146a 4934 else
e4057a39 4935 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
ebfd146a 4936
9771b263 4937 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761 4938 {
e1bd7296 4939 stmt_vec_info new_stmt_info;
4a00c761
JJ
4940 /* Arguments are ready, create the new vector stmt. */
4941 if (code1 == CALL_EXPR)
4942 {
e1bd7296 4943 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
4944 new_temp = make_ssa_name (vec_dest, new_stmt);
4945 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296 4946 new_stmt_info
86a91c0a 4947 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4a00c761
JJ
4948 }
4949 else
4950 {
4951 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
e1bd7296
RS
4952 gassign *new_stmt
4953 = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4954 new_temp = make_ssa_name (vec_dest, new_stmt);
4955 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 4956 new_stmt_info
86a91c0a 4957 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4a00c761
JJ
4958 }
4959
4a00c761 4960 if (slp_node)
e1bd7296 4961 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
225ce44b
RB
4962 else
4963 {
4964 if (!prev_stmt_info)
e1bd7296
RS
4965 STMT_VINFO_VEC_STMT (stmt_info)
4966 = *vec_stmt = new_stmt_info;
225ce44b 4967 else
e1bd7296
RS
4968 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4969 prev_stmt_info = new_stmt_info;
225ce44b 4970 }
4a00c761 4971 }
ebfd146a
IR
4972 }
4973 break;
4974
4975 case WIDEN:
4976 /* In case the vectorization factor (VF) is bigger than the number
4977 of elements that we can fit in a vectype (nunits), we have to
4978 generate more than one vector stmt - i.e - we need to "unroll"
4979 the vector stmt by a factor VF/nunits. */
4980 for (j = 0; j < ncopies; j++)
4981 {
4a00c761 4982 /* Handle uses. */
ebfd146a 4983 if (j == 0)
4a00c761
JJ
4984 {
4985 if (slp_node)
4986 {
4987 if (code == WIDEN_LSHIFT_EXPR)
4988 {
4989 unsigned int k;
ebfd146a 4990
4a00c761
JJ
4991 vec_oprnd1 = op1;
4992 /* Store vec_oprnd1 for every vector stmt to be created
4993 for SLP_NODE. We check during the analysis that all
4994 the shift arguments are the same. */
4995 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4996 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761 4997
86a91c0a
RS
4998 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
4999 &vec_oprnds0, NULL, slp_node);
4a00c761
JJ
5000 }
5001 else
86a91c0a 5002 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
306b0c92 5003 &vec_oprnds1, slp_node);
4a00c761
JJ
5004 }
5005 else
5006 {
86a91c0a 5007 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
9771b263 5008 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5009 if (op_type == binary_op)
5010 {
5011 if (code == WIDEN_LSHIFT_EXPR)
5012 vec_oprnd1 = op1;
5013 else
86a91c0a
RS
5014 vec_oprnd1
5015 = vect_get_vec_def_for_operand (op1, stmt_info);
9771b263 5016 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5017 }
5018 }
5019 }
ebfd146a 5020 else
4a00c761 5021 {
e4057a39 5022 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
9771b263
DN
5023 vec_oprnds0.truncate (0);
5024 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5025 if (op_type == binary_op)
5026 {
5027 if (code == WIDEN_LSHIFT_EXPR)
5028 vec_oprnd1 = op1;
5029 else
e4057a39 5030 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
4a00c761 5031 vec_oprnd1);
9771b263
DN
5032 vec_oprnds1.truncate (0);
5033 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5034 }
5035 }
ebfd146a 5036
4a00c761
JJ
5037 /* Arguments are ready. Create the new vector stmts. */
5038 for (i = multi_step_cvt; i >= 0; i--)
5039 {
9771b263 5040 tree this_dest = vec_dsts[i];
4a00c761
JJ
5041 enum tree_code c1 = code1, c2 = code2;
5042 if (i == 0 && codecvt2 != ERROR_MARK)
5043 {
5044 c1 = codecvt1;
5045 c2 = codecvt2;
5046 }
5047 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
86a91c0a
RS
5048 &vec_oprnds1, stmt_info,
5049 this_dest, gsi,
4a00c761
JJ
5050 c1, c2, decl1, decl2,
5051 op_type);
5052 }
5053
9771b263 5054 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761 5055 {
e1bd7296 5056 stmt_vec_info new_stmt_info;
4a00c761
JJ
5057 if (cvt_type)
5058 {
5059 if (codecvt1 == CALL_EXPR)
5060 {
e1bd7296 5061 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
5062 new_temp = make_ssa_name (vec_dest, new_stmt);
5063 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296 5064 new_stmt_info
86a91c0a
RS
5065 = vect_finish_stmt_generation (stmt_info, new_stmt,
5066 gsi);
4a00c761
JJ
5067 }
5068 else
5069 {
5070 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5071 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
5072 gassign *new_stmt
5073 = gimple_build_assign (new_temp, codecvt1, vop0);
5074 new_stmt_info
86a91c0a
RS
5075 = vect_finish_stmt_generation (stmt_info, new_stmt,
5076 gsi);
4a00c761 5077 }
4a00c761
JJ
5078 }
5079 else
e1bd7296 5080 new_stmt_info = vinfo->lookup_def (vop0);
4a00c761
JJ
5081
5082 if (slp_node)
e1bd7296 5083 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4a00c761 5084 else
c689ce1e
RB
5085 {
5086 if (!prev_stmt_info)
e1bd7296 5087 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
c689ce1e 5088 else
e1bd7296
RS
5089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5090 prev_stmt_info = new_stmt_info;
c689ce1e 5091 }
4a00c761 5092 }
ebfd146a 5093 }
4a00c761
JJ
5094
5095 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5096 break;
5097
5098 case NARROW:
5099 /* In case the vectorization factor (VF) is bigger than the number
5100 of elements that we can fit in a vectype (nunits), we have to
5101 generate more than one vector stmt - i.e - we need to "unroll"
5102 the vector stmt by a factor VF/nunits. */
5103 for (j = 0; j < ncopies; j++)
5104 {
5105 /* Handle uses. */
4a00c761 5106 if (slp_node)
86a91c0a 5107 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
306b0c92 5108 slp_node);
ebfd146a
IR
5109 else
5110 {
9771b263 5111 vec_oprnds0.truncate (0);
e4057a39 5112 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
4a00c761 5113 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5114 }
5115
4a00c761
JJ
5116 /* Arguments are ready. Create the new vector stmts. */
5117 if (cvt_type)
9771b263 5118 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5119 {
5120 if (codecvt1 == CALL_EXPR)
5121 {
e1bd7296 5122 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
5123 new_temp = make_ssa_name (vec_dest, new_stmt);
5124 gimple_call_set_lhs (new_stmt, new_temp);
86a91c0a 5125 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4a00c761
JJ
5126 }
5127 else
5128 {
5129 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5130 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
5131 gassign *new_stmt
5132 = gimple_build_assign (new_temp, codecvt1, vop0);
86a91c0a 5133 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4a00c761 5134 }
ebfd146a 5135
9771b263 5136 vec_oprnds0[i] = new_temp;
4a00c761 5137 }
ebfd146a 5138
4a00c761 5139 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
86a91c0a 5140 stmt_info, vec_dsts, gsi,
4a00c761
JJ
5141 slp_node, code1,
5142 &prev_stmt_info);
ebfd146a
IR
5143 }
5144
5145 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5146 break;
ebfd146a
IR
5147 }
5148
9771b263
DN
5149 vec_oprnds0.release ();
5150 vec_oprnds1.release ();
9771b263 5151 interm_types.release ();
ebfd146a
IR
5152
5153 return true;
5154}
ff802fa1
IR
5155
5156
ebfd146a
IR
5157/* Function vectorizable_assignment.
5158
32e8e429
RS
5159 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5160 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5161 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5162 Return true if STMT_INFO is vectorizable in this way. */
ebfd146a
IR
5163
5164static bool
32e8e429 5165vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195 5166 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5167 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5168{
5169 tree vec_dest;
5170 tree scalar_dest;
5171 tree op;
ebfd146a
IR
5172 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5173 tree new_temp;
4fc5ebf1
JG
5174 enum vect_def_type dt[1] = {vect_unknown_def_type};
5175 int ndts = 1;
ebfd146a 5176 int ncopies;
f18b55bd 5177 int i, j;
6e1aa848 5178 vec<tree> vec_oprnds = vNULL;
ebfd146a 5179 tree vop;
a70d6342 5180 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5181 vec_info *vinfo = stmt_info->vinfo;
f18b55bd 5182 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5183 enum tree_code code;
5184 tree vectype_in;
ebfd146a 5185
a70d6342 5186 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5187 return false;
5188
66c16fd9
RB
5189 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5190 && ! vec_stmt)
ebfd146a
IR
5191 return false;
5192
5193 /* Is vectorizable assignment? */
32e8e429
RS
5194 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5195 if (!stmt)
ebfd146a
IR
5196 return false;
5197
5198 scalar_dest = gimple_assign_lhs (stmt);
5199 if (TREE_CODE (scalar_dest) != SSA_NAME)
5200 return false;
5201
fde9c428 5202 code = gimple_assign_rhs_code (stmt);
ebfd146a 5203 if (gimple_assign_single_p (stmt)
fde9c428
RG
5204 || code == PAREN_EXPR
5205 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5206 op = gimple_assign_rhs1 (stmt);
5207 else
5208 return false;
5209
7b7ec6c5
RG
5210 if (code == VIEW_CONVERT_EXPR)
5211 op = TREE_OPERAND (op, 0);
5212
465c8c19 5213 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5214 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5215
5216 /* Multiple types in SLP are handled by creating the appropriate number of
5217 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5218 case of SLP. */
fce57248 5219 if (slp_node)
465c8c19
JJ
5220 ncopies = 1;
5221 else
e8f142e2 5222 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5223
5224 gcc_assert (ncopies >= 1);
5225
894dd753 5226 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
ebfd146a 5227 {
73fbfcad 5228 if (dump_enabled_p ())
78c60e3d 5229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5230 "use not simple.\n");
ebfd146a
IR
5231 return false;
5232 }
5233
fde9c428
RG
5234 /* We can handle NOP_EXPR conversions that do not change the number
5235 of elements or the vector size. */
7b7ec6c5
RG
5236 if ((CONVERT_EXPR_CODE_P (code)
5237 || code == VIEW_CONVERT_EXPR)
fde9c428 5238 && (!vectype_in
928686b1 5239 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5240 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5241 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5242 return false;
5243
7b7b1813
RG
5244 /* We do not handle bit-precision changes. */
5245 if ((CONVERT_EXPR_CODE_P (code)
5246 || code == VIEW_CONVERT_EXPR)
5247 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5248 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5249 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5250 /* But a conversion that does not change the bit-pattern is ok. */
5251 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5252 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5253 && TYPE_UNSIGNED (TREE_TYPE (op)))
5254 /* Conversion between boolean types of different sizes is
5255 a simple assignment in case their vectypes are same
5256 boolean vectors. */
5257 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5258 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5259 {
73fbfcad 5260 if (dump_enabled_p ())
78c60e3d
SS
5261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5262 "type conversion to/from bit-precision "
e645e942 5263 "unsupported.\n");
7b7b1813
RG
5264 return false;
5265 }
5266
ebfd146a
IR
5267 if (!vec_stmt) /* transformation not required. */
5268 {
5269 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
adac3a68 5270 DUMP_VECT_SCOPE ("vectorizable_assignment");
68435eb2 5271 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5272 return true;
5273 }
5274
67b8dbac 5275 /* Transform. */
73fbfcad 5276 if (dump_enabled_p ())
e645e942 5277 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5278
5279 /* Handle def. */
5280 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5281
5282 /* Handle use. */
f18b55bd 5283 for (j = 0; j < ncopies; j++)
ebfd146a 5284 {
f18b55bd
IR
5285 /* Handle uses. */
5286 if (j == 0)
86a91c0a 5287 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
f18b55bd 5288 else
e4057a39 5289 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
f18b55bd
IR
5290
5291 /* Arguments are ready. create the new vector stmt. */
e1bd7296 5292 stmt_vec_info new_stmt_info = NULL;
9771b263 5293 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5294 {
7b7ec6c5
RG
5295 if (CONVERT_EXPR_CODE_P (code)
5296 || code == VIEW_CONVERT_EXPR)
4a73490d 5297 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
e1bd7296 5298 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
f18b55bd
IR
5299 new_temp = make_ssa_name (vec_dest, new_stmt);
5300 gimple_assign_set_lhs (new_stmt, new_temp);
86a91c0a
RS
5301 new_stmt_info
5302 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
f18b55bd 5303 if (slp_node)
e1bd7296 5304 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
f18b55bd 5305 }
ebfd146a
IR
5306
5307 if (slp_node)
f18b55bd
IR
5308 continue;
5309
5310 if (j == 0)
e1bd7296 5311 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
f18b55bd 5312 else
e1bd7296 5313 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
f18b55bd 5314
e1bd7296 5315 prev_stmt_info = new_stmt_info;
f18b55bd 5316 }
b8698a0f 5317
9771b263 5318 vec_oprnds.release ();
ebfd146a
IR
5319 return true;
5320}
5321
9dc3f7de 5322
1107f3ae
IR
5323/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5324 either as shift by a scalar or by a vector. */
5325
5326bool
5327vect_supportable_shift (enum tree_code code, tree scalar_type)
5328{
5329
ef4bddc2 5330 machine_mode vec_mode;
1107f3ae
IR
5331 optab optab;
5332 int icode;
5333 tree vectype;
5334
5335 vectype = get_vectype_for_scalar_type (scalar_type);
5336 if (!vectype)
5337 return false;
5338
5339 optab = optab_for_tree_code (code, vectype, optab_scalar);
5340 if (!optab
5341 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5342 {
5343 optab = optab_for_tree_code (code, vectype, optab_vector);
5344 if (!optab
5345 || (optab_handler (optab, TYPE_MODE (vectype))
5346 == CODE_FOR_nothing))
5347 return false;
5348 }
5349
5350 vec_mode = TYPE_MODE (vectype);
5351 icode = (int) optab_handler (optab, vec_mode);
5352 if (icode == CODE_FOR_nothing)
5353 return false;
5354
5355 return true;
5356}
5357
5358
9dc3f7de
IR
5359/* Function vectorizable_shift.
5360
32e8e429
RS
5361 Check if STMT_INFO performs a shift operation that can be vectorized.
5362 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5363 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5364 Return true if STMT_INFO is vectorizable in this way. */
9dc3f7de
IR
5365
5366static bool
32e8e429 5367vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195 5368 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5369 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5370{
5371 tree vec_dest;
5372 tree scalar_dest;
5373 tree op0, op1 = NULL;
5374 tree vec_oprnd1 = NULL_TREE;
9dc3f7de
IR
5375 tree vectype;
5376 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5377 enum tree_code code;
ef4bddc2 5378 machine_mode vec_mode;
9dc3f7de
IR
5379 tree new_temp;
5380 optab optab;
5381 int icode;
ef4bddc2 5382 machine_mode optab_op2_mode;
9dc3f7de 5383 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5384 int ndts = 2;
9dc3f7de 5385 stmt_vec_info prev_stmt_info;
928686b1
RS
5386 poly_uint64 nunits_in;
5387 poly_uint64 nunits_out;
9dc3f7de 5388 tree vectype_out;
cede2577 5389 tree op1_vectype;
9dc3f7de
IR
5390 int ncopies;
5391 int j, i;
6e1aa848
DN
5392 vec<tree> vec_oprnds0 = vNULL;
5393 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5394 tree vop0, vop1;
5395 unsigned int k;
49eab32e 5396 bool scalar_shift_arg = true;
9dc3f7de 5397 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5398 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5399
5400 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5401 return false;
5402
66c16fd9
RB
5403 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5404 && ! vec_stmt)
9dc3f7de
IR
5405 return false;
5406
5407 /* Is STMT a vectorizable binary/unary operation? */
32e8e429
RS
5408 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5409 if (!stmt)
9dc3f7de
IR
5410 return false;
5411
5412 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5413 return false;
5414
5415 code = gimple_assign_rhs_code (stmt);
5416
5417 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5418 || code == RROTATE_EXPR))
5419 return false;
5420
5421 scalar_dest = gimple_assign_lhs (stmt);
5422 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5423 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5424 {
73fbfcad 5425 if (dump_enabled_p ())
78c60e3d 5426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5427 "bit-precision shifts not supported.\n");
7b7b1813
RG
5428 return false;
5429 }
9dc3f7de
IR
5430
5431 op0 = gimple_assign_rhs1 (stmt);
894dd753 5432 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
9dc3f7de 5433 {
73fbfcad 5434 if (dump_enabled_p ())
78c60e3d 5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5436 "use not simple.\n");
9dc3f7de
IR
5437 return false;
5438 }
5439 /* If op0 is an external or constant def use a vector type with
5440 the same size as the output vector type. */
5441 if (!vectype)
5442 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5443 if (vec_stmt)
5444 gcc_assert (vectype);
5445 if (!vectype)
5446 {
73fbfcad 5447 if (dump_enabled_p ())
78c60e3d 5448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5449 "no vectype for scalar type\n");
9dc3f7de
IR
5450 return false;
5451 }
5452
5453 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5454 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5455 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5456 return false;
5457
5458 op1 = gimple_assign_rhs2 (stmt);
fef96d8e
RS
5459 stmt_vec_info op1_def_stmt_info;
5460 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5461 &op1_def_stmt_info))
9dc3f7de 5462 {
73fbfcad 5463 if (dump_enabled_p ())
78c60e3d 5464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5465 "use not simple.\n");
9dc3f7de
IR
5466 return false;
5467 }
5468
9dc3f7de
IR
5469 /* Multiple types in SLP are handled by creating the appropriate number of
5470 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5471 case of SLP. */
fce57248 5472 if (slp_node)
9dc3f7de
IR
5473 ncopies = 1;
5474 else
e8f142e2 5475 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5476
5477 gcc_assert (ncopies >= 1);
5478
5479 /* Determine whether the shift amount is a vector, or scalar. If the
5480 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5481
dbfa87aa
YR
5482 if ((dt[1] == vect_internal_def
5483 || dt[1] == vect_induction_def)
5484 && !slp_node)
49eab32e
JJ
5485 scalar_shift_arg = false;
5486 else if (dt[1] == vect_constant_def
5487 || dt[1] == vect_external_def
5488 || dt[1] == vect_internal_def)
5489 {
5490 /* In SLP, need to check whether the shift count is the same,
5491 in loops if it is a constant or invariant, it is always
5492 a scalar shift. */
5493 if (slp_node)
5494 {
b9787581
RS
5495 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5496 stmt_vec_info slpstmt_info;
49eab32e 5497
b9787581
RS
5498 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5499 {
5500 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5501 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5502 scalar_shift_arg = false;
5503 }
49eab32e 5504 }
60d393e8
RB
5505
5506 /* If the shift amount is computed by a pattern stmt we cannot
5507 use the scalar amount directly thus give up and use a vector
5508 shift. */
fef96d8e
RS
5509 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5510 scalar_shift_arg = false;
49eab32e
JJ
5511 }
5512 else
5513 {
73fbfcad 5514 if (dump_enabled_p ())
78c60e3d 5515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5516 "operand mode requires invariant argument.\n");
49eab32e
JJ
5517 return false;
5518 }
5519
9dc3f7de 5520 /* Vector shifted by vector. */
49eab32e 5521 if (!scalar_shift_arg)
9dc3f7de
IR
5522 {
5523 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5524 if (dump_enabled_p ())
78c60e3d 5525 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5526 "vector/vector shift/rotate found.\n");
78c60e3d 5527
aa948027
JJ
5528 if (!op1_vectype)
5529 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5530 if (op1_vectype == NULL_TREE
5531 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5532 {
73fbfcad 5533 if (dump_enabled_p ())
78c60e3d
SS
5534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5535 "unusable type for last operand in"
e645e942 5536 " vector/vector shift/rotate.\n");
cede2577
JJ
5537 return false;
5538 }
9dc3f7de
IR
5539 }
5540 /* See if the machine has a vector shifted by scalar insn and if not
5541 then see if it has a vector shifted by vector insn. */
49eab32e 5542 else
9dc3f7de
IR
5543 {
5544 optab = optab_for_tree_code (code, vectype, optab_scalar);
5545 if (optab
5546 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5547 {
73fbfcad 5548 if (dump_enabled_p ())
78c60e3d 5549 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5550 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5551 }
5552 else
5553 {
5554 optab = optab_for_tree_code (code, vectype, optab_vector);
5555 if (optab
5556 && (optab_handler (optab, TYPE_MODE (vectype))
5557 != CODE_FOR_nothing))
5558 {
49eab32e
JJ
5559 scalar_shift_arg = false;
5560
73fbfcad 5561 if (dump_enabled_p ())
78c60e3d 5562 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5563 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5564
5565 /* Unlike the other binary operators, shifts/rotates have
5566 the rhs being int, instead of the same type as the lhs,
5567 so make sure the scalar is the right type if we are
aa948027 5568 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5569 if (dt[1] == vect_constant_def)
5570 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5571 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5572 TREE_TYPE (op1)))
5573 {
5574 if (slp_node
5575 && TYPE_MODE (TREE_TYPE (vectype))
5576 != TYPE_MODE (TREE_TYPE (op1)))
5577 {
73fbfcad 5578 if (dump_enabled_p ())
78c60e3d
SS
5579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5580 "unusable type for last operand in"
e645e942 5581 " vector/vector shift/rotate.\n");
21c0a521 5582 return false;
aa948027
JJ
5583 }
5584 if (vec_stmt && !slp_node)
5585 {
5586 op1 = fold_convert (TREE_TYPE (vectype), op1);
86a91c0a 5587 op1 = vect_init_vector (stmt_info, op1,
aa948027
JJ
5588 TREE_TYPE (vectype), NULL);
5589 }
5590 }
9dc3f7de
IR
5591 }
5592 }
5593 }
9dc3f7de
IR
5594
5595 /* Supportable by target? */
5596 if (!optab)
5597 {
73fbfcad 5598 if (dump_enabled_p ())
78c60e3d 5599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5600 "no optab.\n");
9dc3f7de
IR
5601 return false;
5602 }
5603 vec_mode = TYPE_MODE (vectype);
5604 icode = (int) optab_handler (optab, vec_mode);
5605 if (icode == CODE_FOR_nothing)
5606 {
73fbfcad 5607 if (dump_enabled_p ())
78c60e3d 5608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5609 "op not supported by target.\n");
9dc3f7de 5610 /* Check only during analysis. */
cf098191 5611 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5612 || (!vec_stmt
5613 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5614 return false;
73fbfcad 5615 if (dump_enabled_p ())
e645e942
TJ
5616 dump_printf_loc (MSG_NOTE, vect_location,
5617 "proceeding using word mode.\n");
9dc3f7de
IR
5618 }
5619
5620 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5621 if (!vec_stmt
5622 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5623 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5624 {
73fbfcad 5625 if (dump_enabled_p ())
78c60e3d 5626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5627 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5628 return false;
5629 }
5630
5631 if (!vec_stmt) /* transformation not required. */
5632 {
5633 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
adac3a68 5634 DUMP_VECT_SCOPE ("vectorizable_shift");
68435eb2 5635 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5636 return true;
5637 }
5638
67b8dbac 5639 /* Transform. */
9dc3f7de 5640
73fbfcad 5641 if (dump_enabled_p ())
78c60e3d 5642 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5643 "transform binary/unary operation.\n");
9dc3f7de
IR
5644
5645 /* Handle def. */
5646 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5647
9dc3f7de
IR
5648 prev_stmt_info = NULL;
5649 for (j = 0; j < ncopies; j++)
5650 {
5651 /* Handle uses. */
5652 if (j == 0)
5653 {
5654 if (scalar_shift_arg)
5655 {
5656 /* Vector shl and shr insn patterns can be defined with scalar
5657 operand 2 (shift operand). In this case, use constant or loop
5658 invariant op1 directly, without extending it to vector mode
5659 first. */
5660 optab_op2_mode = insn_data[icode].operand[2].mode;
5661 if (!VECTOR_MODE_P (optab_op2_mode))
5662 {
73fbfcad 5663 if (dump_enabled_p ())
78c60e3d 5664 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5665 "operand 1 using scalar mode.\n");
9dc3f7de 5666 vec_oprnd1 = op1;
8930f723 5667 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5668 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5669 if (slp_node)
5670 {
5671 /* Store vec_oprnd1 for every vector stmt to be created
5672 for SLP_NODE. We check during the analysis that all
5673 the shift arguments are the same.
5674 TODO: Allow different constants for different vector
5675 stmts generated for an SLP instance. */
5676 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5677 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5678 }
5679 }
5680 }
5681
5682 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5683 (a special case for certain kind of vector shifts); otherwise,
5684 operand 1 should be of a vector type (the usual case). */
5685 if (vec_oprnd1)
86a91c0a
RS
5686 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5687 slp_node);
9dc3f7de 5688 else
86a91c0a
RS
5689 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5690 slp_node);
9dc3f7de
IR
5691 }
5692 else
e4057a39 5693 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
9dc3f7de
IR
5694
5695 /* Arguments are ready. Create the new vector stmt. */
e1bd7296 5696 stmt_vec_info new_stmt_info = NULL;
9771b263 5697 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5698 {
9771b263 5699 vop1 = vec_oprnds1[i];
e1bd7296 5700 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5701 new_temp = make_ssa_name (vec_dest, new_stmt);
5702 gimple_assign_set_lhs (new_stmt, new_temp);
86a91c0a
RS
5703 new_stmt_info
5704 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9dc3f7de 5705 if (slp_node)
e1bd7296 5706 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9dc3f7de
IR
5707 }
5708
5709 if (slp_node)
5710 continue;
5711
5712 if (j == 0)
e1bd7296 5713 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9dc3f7de 5714 else
e1bd7296
RS
5715 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5716 prev_stmt_info = new_stmt_info;
9dc3f7de
IR
5717 }
5718
9771b263
DN
5719 vec_oprnds0.release ();
5720 vec_oprnds1.release ();
9dc3f7de
IR
5721
5722 return true;
5723}
5724
5725
ebfd146a
IR
5726/* Function vectorizable_operation.
5727
32e8e429 5728 Check if STMT_INFO performs a binary, unary or ternary operation that can
16949072 5729 be vectorized.
32e8e429
RS
5730 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5731 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5732 Return true if STMT_INFO is vectorizable in this way. */
ebfd146a
IR
5733
5734static bool
32e8e429 5735vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195 5736 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5737 stmt_vector_for_cost *cost_vec)
ebfd146a 5738{
00f07b86 5739 tree vec_dest;
ebfd146a 5740 tree scalar_dest;
16949072 5741 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
00f07b86 5742 tree vectype;
ebfd146a 5743 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5744 enum tree_code code, orig_code;
ef4bddc2 5745 machine_mode vec_mode;
ebfd146a
IR
5746 tree new_temp;
5747 int op_type;
00f07b86 5748 optab optab;
523ba738 5749 bool target_support_p;
16949072
RG
5750 enum vect_def_type dt[3]
5751 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5752 int ndts = 3;
ebfd146a 5753 stmt_vec_info prev_stmt_info;
928686b1
RS
5754 poly_uint64 nunits_in;
5755 poly_uint64 nunits_out;
ebfd146a
IR
5756 tree vectype_out;
5757 int ncopies;
5758 int j, i;
6e1aa848
DN
5759 vec<tree> vec_oprnds0 = vNULL;
5760 vec<tree> vec_oprnds1 = vNULL;
5761 vec<tree> vec_oprnds2 = vNULL;
16949072 5762 tree vop0, vop1, vop2;
a70d6342 5763 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5764 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5765
a70d6342 5766 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5767 return false;
5768
66c16fd9
RB
5769 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5770 && ! vec_stmt)
ebfd146a
IR
5771 return false;
5772
5773 /* Is STMT a vectorizable binary/unary operation? */
32e8e429
RS
5774 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5775 if (!stmt)
ebfd146a
IR
5776 return false;
5777
5778 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5779 return false;
5780
0eb952ea 5781 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5782
1af4ebf5
MG
5783 /* For pointer addition and subtraction, we should use the normal
5784 plus and minus for the vector operation. */
ebfd146a
IR
5785 if (code == POINTER_PLUS_EXPR)
5786 code = PLUS_EXPR;
1af4ebf5
MG
5787 if (code == POINTER_DIFF_EXPR)
5788 code = MINUS_EXPR;
ebfd146a
IR
5789
5790 /* Support only unary or binary operations. */
5791 op_type = TREE_CODE_LENGTH (code);
16949072 5792 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5793 {
73fbfcad 5794 if (dump_enabled_p ())
78c60e3d 5795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5796 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5797 op_type);
ebfd146a
IR
5798 return false;
5799 }
5800
b690cc0f
RG
5801 scalar_dest = gimple_assign_lhs (stmt);
5802 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5803
7b7b1813
RG
5804 /* Most operations cannot handle bit-precision types without extra
5805 truncations. */
045c1278 5806 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5807 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5808 /* Exception are bitwise binary operations. */
5809 && code != BIT_IOR_EXPR
5810 && code != BIT_XOR_EXPR
5811 && code != BIT_AND_EXPR)
5812 {
73fbfcad 5813 if (dump_enabled_p ())
78c60e3d 5814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5815 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5816 return false;
5817 }
5818
ebfd146a 5819 op0 = gimple_assign_rhs1 (stmt);
894dd753 5820 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
ebfd146a 5821 {
73fbfcad 5822 if (dump_enabled_p ())
78c60e3d 5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5824 "use not simple.\n");
ebfd146a
IR
5825 return false;
5826 }
b690cc0f
RG
5827 /* If op0 is an external or constant def use a vector type with
5828 the same size as the output vector type. */
5829 if (!vectype)
b036c6c5
IE
5830 {
5831 /* For boolean type we cannot determine vectype by
5832 invariant value (don't know whether it is a vector
5833 of booleans or vector of integers). We use output
5834 vectype because operations on boolean don't change
5835 type. */
2568d8a1 5836 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5837 {
2568d8a1 5838 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5839 {
5840 if (dump_enabled_p ())
5841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5842 "not supported operation on bool value.\n");
5843 return false;
5844 }
5845 vectype = vectype_out;
5846 }
5847 else
5848 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5849 }
7d8930a0
IR
5850 if (vec_stmt)
5851 gcc_assert (vectype);
5852 if (!vectype)
5853 {
73fbfcad 5854 if (dump_enabled_p ())
3c2a8ed0
DM
5855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5856 "no vectype for scalar type %T\n",
5857 TREE_TYPE (op0));
7d8930a0
IR
5858
5859 return false;
5860 }
b690cc0f
RG
5861
5862 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5863 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5864 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5865 return false;
ebfd146a 5866
16949072 5867 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5868 {
5869 op1 = gimple_assign_rhs2 (stmt);
894dd753 5870 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
ebfd146a 5871 {
73fbfcad 5872 if (dump_enabled_p ())
78c60e3d 5873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5874 "use not simple.\n");
ebfd146a
IR
5875 return false;
5876 }
5877 }
16949072
RG
5878 if (op_type == ternary_op)
5879 {
5880 op2 = gimple_assign_rhs3 (stmt);
894dd753 5881 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
16949072 5882 {
73fbfcad 5883 if (dump_enabled_p ())
78c60e3d 5884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5885 "use not simple.\n");
16949072
RG
5886 return false;
5887 }
5888 }
ebfd146a 5889
b690cc0f 5890 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5891 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5892 case of SLP. */
fce57248 5893 if (slp_node)
b690cc0f
RG
5894 ncopies = 1;
5895 else
e8f142e2 5896 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5897
5898 gcc_assert (ncopies >= 1);
5899
9dc3f7de 5900 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5901 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5902 || code == RROTATE_EXPR)
9dc3f7de 5903 return false;
ebfd146a 5904
ebfd146a 5905 /* Supportable by target? */
00f07b86
RH
5906
5907 vec_mode = TYPE_MODE (vectype);
5908 if (code == MULT_HIGHPART_EXPR)
523ba738 5909 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5910 else
5911 {
5912 optab = optab_for_tree_code (code, vectype, optab_default);
5913 if (!optab)
5deb57cb 5914 {
73fbfcad 5915 if (dump_enabled_p ())
78c60e3d 5916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5917 "no optab.\n");
00f07b86 5918 return false;
5deb57cb 5919 }
523ba738
RS
5920 target_support_p = (optab_handler (optab, vec_mode)
5921 != CODE_FOR_nothing);
5deb57cb
JJ
5922 }
5923
523ba738 5924 if (!target_support_p)
ebfd146a 5925 {
73fbfcad 5926 if (dump_enabled_p ())
78c60e3d 5927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5928 "op not supported by target.\n");
ebfd146a 5929 /* Check only during analysis. */
cf098191 5930 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5931 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5932 return false;
73fbfcad 5933 if (dump_enabled_p ())
e645e942
TJ
5934 dump_printf_loc (MSG_NOTE, vect_location,
5935 "proceeding using word mode.\n");
383d9c83
IR
5936 }
5937
4a00c761 5938 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5939 if (!VECTOR_MODE_P (vec_mode)
5940 && !vec_stmt
ca09abcb 5941 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5942 {
73fbfcad 5943 if (dump_enabled_p ())
78c60e3d 5944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5945 "not worthwhile without SIMD support.\n");
e34842c6 5946 return false;
7d8930a0 5947 }
ebfd146a 5948
ebfd146a
IR
5949 if (!vec_stmt) /* transformation not required. */
5950 {
4a00c761 5951 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
adac3a68 5952 DUMP_VECT_SCOPE ("vectorizable_operation");
68435eb2 5953 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5954 return true;
5955 }
5956
67b8dbac 5957 /* Transform. */
ebfd146a 5958
73fbfcad 5959 if (dump_enabled_p ())
78c60e3d 5960 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5961 "transform binary/unary operation.\n");
383d9c83 5962
0eb952ea
JJ
5963 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5964 vectors with unsigned elements, but the result is signed. So, we
5965 need to compute the MINUS_EXPR into vectype temporary and
5966 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5967 tree vec_cvt_dest = NULL_TREE;
5968 if (orig_code == POINTER_DIFF_EXPR)
7b76867b
RB
5969 {
5970 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5971 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5972 }
5973 /* Handle def. */
5974 else
5975 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
0eb952ea 5976
ebfd146a
IR
5977 /* In case the vectorization factor (VF) is bigger than the number
5978 of elements that we can fit in a vectype (nunits), we have to generate
5979 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5980 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5981 from one copy of the vector stmt to the next, in the field
5982 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5983 stages to find the correct vector defs to be used when vectorizing
5984 stmts that use the defs of the current stmt. The example below
5985 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5986 we need to create 4 vectorized stmts):
5987
5988 before vectorization:
5989 RELATED_STMT VEC_STMT
5990 S1: x = memref - -
5991 S2: z = x + 1 - -
5992
5993 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5994 there):
5995 RELATED_STMT VEC_STMT
5996 VS1_0: vx0 = memref0 VS1_1 -
5997 VS1_1: vx1 = memref1 VS1_2 -
5998 VS1_2: vx2 = memref2 VS1_3 -
5999 VS1_3: vx3 = memref3 - -
6000 S1: x = load - VS1_0
6001 S2: z = x + 1 - -
6002
6003 step2: vectorize stmt S2 (done here):
6004 To vectorize stmt S2 we first need to find the relevant vector
6005 def for the first operand 'x'. This is, as usual, obtained from
6006 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6007 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6008 relevant vector def 'vx0'. Having found 'vx0' we can generate
6009 the vector stmt VS2_0, and as usual, record it in the
6010 STMT_VINFO_VEC_STMT of stmt S2.
6011 When creating the second copy (VS2_1), we obtain the relevant vector
6012 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6013 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6014 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6015 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6016 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6017 chain of stmts and pointers:
6018 RELATED_STMT VEC_STMT
6019 VS1_0: vx0 = memref0 VS1_1 -
6020 VS1_1: vx1 = memref1 VS1_2 -
6021 VS1_2: vx2 = memref2 VS1_3 -
6022 VS1_3: vx3 = memref3 - -
6023 S1: x = load - VS1_0
6024 VS2_0: vz0 = vx0 + v1 VS2_1 -
6025 VS2_1: vz1 = vx1 + v1 VS2_2 -
6026 VS2_2: vz2 = vx2 + v1 VS2_3 -
6027 VS2_3: vz3 = vx3 + v1 - -
6028 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6029
6030 prev_stmt_info = NULL;
6031 for (j = 0; j < ncopies; j++)
6032 {
6033 /* Handle uses. */
6034 if (j == 0)
4a00c761 6035 {
d6476f90 6036 if (op_type == binary_op)
86a91c0a 6037 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
306b0c92 6038 slp_node);
d6476f90
RB
6039 else if (op_type == ternary_op)
6040 {
6041 if (slp_node)
6042 {
6043 auto_vec<tree> ops(3);
6044 ops.quick_push (op0);
6045 ops.quick_push (op1);
6046 ops.quick_push (op2);
6047 auto_vec<vec<tree> > vec_defs(3);
6048 vect_get_slp_defs (ops, slp_node, &vec_defs);
6049 vec_oprnds0 = vec_defs[0];
6050 vec_oprnds1 = vec_defs[1];
6051 vec_oprnds2 = vec_defs[2];
6052 }
6053 else
6054 {
86a91c0a
RS
6055 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6056 &vec_oprnds1, NULL);
6057 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6058 NULL, NULL);
d6476f90
RB
6059 }
6060 }
4a00c761 6061 else
86a91c0a 6062 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
306b0c92 6063 slp_node);
4a00c761 6064 }
ebfd146a 6065 else
4a00c761 6066 {
e4057a39 6067 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
4a00c761
JJ
6068 if (op_type == ternary_op)
6069 {
9771b263 6070 tree vec_oprnd = vec_oprnds2.pop ();
e4057a39 6071 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
9771b263 6072 vec_oprnd));
4a00c761
JJ
6073 }
6074 }
6075
6076 /* Arguments are ready. Create the new vector stmt. */
e1bd7296 6077 stmt_vec_info new_stmt_info = NULL;
9771b263 6078 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6079 {
4a00c761 6080 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6081 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6082 vop2 = ((op_type == ternary_op)
9771b263 6083 ? vec_oprnds2[i] : NULL_TREE);
e1bd7296
RS
6084 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6085 vop0, vop1, vop2);
4a00c761
JJ
6086 new_temp = make_ssa_name (vec_dest, new_stmt);
6087 gimple_assign_set_lhs (new_stmt, new_temp);
86a91c0a
RS
6088 new_stmt_info
6089 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
0eb952ea
JJ
6090 if (vec_cvt_dest)
6091 {
6092 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
e1bd7296
RS
6093 gassign *new_stmt
6094 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6095 new_temp);
0eb952ea
JJ
6096 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6097 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 6098 new_stmt_info
86a91c0a 6099 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
0eb952ea 6100 }
4a00c761 6101 if (slp_node)
e1bd7296 6102 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
ebfd146a
IR
6103 }
6104
4a00c761
JJ
6105 if (slp_node)
6106 continue;
6107
6108 if (j == 0)
e1bd7296 6109 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4a00c761 6110 else
e1bd7296
RS
6111 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6112 prev_stmt_info = new_stmt_info;
ebfd146a
IR
6113 }
6114
9771b263
DN
6115 vec_oprnds0.release ();
6116 vec_oprnds1.release ();
6117 vec_oprnds2.release ();
ebfd146a 6118
ebfd146a
IR
6119 return true;
6120}
6121
89fa689a 6122/* A helper function to ensure data reference DR_INFO's base alignment. */
c716e67f
XDL
6123
6124static void
89fa689a 6125ensure_base_align (dr_vec_info *dr_info)
c716e67f 6126{
89fa689a 6127 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
c716e67f
XDL
6128 return;
6129
89fa689a 6130 if (dr_info->base_misaligned)
c716e67f 6131 {
89fa689a 6132 tree base_decl = dr_info->base_decl;
c716e67f 6133
89fa689a
RS
6134 unsigned int align_base_to
6135 = DR_TARGET_ALIGNMENT (dr_info) * BITS_PER_UNIT;
f702e7d4 6136
428f0c67 6137 if (decl_in_symtab_p (base_decl))
f702e7d4 6138 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6139 else
6140 {
f702e7d4 6141 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6142 DECL_USER_ALIGN (base_decl) = 1;
6143 }
89fa689a 6144 dr_info->base_misaligned = false;
c716e67f
XDL
6145 }
6146}
6147
ebfd146a 6148
44fc7854
BE
6149/* Function get_group_alias_ptr_type.
6150
32e8e429 6151 Return the alias type for the group starting at FIRST_STMT_INFO. */
44fc7854
BE
6152
6153static tree
32e8e429 6154get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
44fc7854
BE
6155{
6156 struct data_reference *first_dr, *next_dr;
44fc7854 6157
91987857
RS
6158 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6159 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
bffb8014 6160 while (next_stmt_info)
44fc7854 6161 {
bffb8014 6162 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
44fc7854
BE
6163 if (get_alias_set (DR_REF (first_dr))
6164 != get_alias_set (DR_REF (next_dr)))
6165 {
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE, vect_location,
6168 "conflicting alias set types.\n");
6169 return ptr_type_node;
6170 }
bffb8014 6171 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
44fc7854
BE
6172 }
6173 return reference_alias_ptr_type (DR_REF (first_dr));
6174}
6175
6176
ebfd146a
IR
6177/* Function vectorizable_store.
6178
32e8e429
RS
6179 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6180 that can be vectorized.
6181 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6182 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6183 Return true if STMT_INFO is vectorizable in this way. */
ebfd146a
IR
6184
6185static bool
32e8e429 6186vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195
RS
6187 stmt_vec_info *vec_stmt, slp_tree slp_node,
6188 stmt_vector_for_cost *cost_vec)
ebfd146a 6189{
ebfd146a
IR
6190 tree data_ref;
6191 tree op;
6192 tree vec_oprnd = NULL_TREE;
272c6793 6193 tree elem_type;
ebfd146a 6194 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6195 struct loop *loop = NULL;
ef4bddc2 6196 machine_mode vec_mode;
ebfd146a
IR
6197 tree dummy;
6198 enum dr_alignment_support alignment_support_scheme;
929b4411
RS
6199 enum vect_def_type rhs_dt = vect_unknown_def_type;
6200 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6201 stmt_vec_info prev_stmt_info = NULL;
6202 tree dataref_ptr = NULL_TREE;
74bf76ed 6203 tree dataref_offset = NULL_TREE;
355fe088 6204 gimple *ptr_incr = NULL;
ebfd146a
IR
6205 int ncopies;
6206 int j;
bffb8014 6207 stmt_vec_info first_stmt_info;
2de001ee 6208 bool grouped_store;
ebfd146a 6209 unsigned int group_size, i;
6e1aa848
DN
6210 vec<tree> oprnds = vNULL;
6211 vec<tree> result_chain = vNULL;
09dfa495 6212 tree offset = NULL_TREE;
6e1aa848 6213 vec<tree> vec_oprnds = vNULL;
ebfd146a 6214 bool slp = (slp_node != NULL);
ebfd146a 6215 unsigned int vec_num;
a70d6342 6216 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6217 vec_info *vinfo = stmt_info->vinfo;
272c6793 6218 tree aggr_type;
134c85ca 6219 gather_scatter_info gs_info;
d9f21f6a 6220 poly_uint64 vf;
2de001ee 6221 vec_load_store_type vls_type;
44fc7854 6222 tree ref_type;
a70d6342 6223
a70d6342 6224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6225 return false;
6226
66c16fd9
RB
6227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6228 && ! vec_stmt)
ebfd146a
IR
6229 return false;
6230
6231 /* Is vectorizable store? */
6232
c3a8f964 6233 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
86a91c0a 6234 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
c3a8f964 6235 {
beb456c3 6236 tree scalar_dest = gimple_assign_lhs (assign);
c3a8f964
RS
6237 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6238 && is_pattern_stmt_p (stmt_info))
6239 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6240 if (TREE_CODE (scalar_dest) != ARRAY_REF
6241 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6242 && TREE_CODE (scalar_dest) != INDIRECT_REF
6243 && TREE_CODE (scalar_dest) != COMPONENT_REF
6244 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6245 && TREE_CODE (scalar_dest) != REALPART_EXPR
6246 && TREE_CODE (scalar_dest) != MEM_REF)
6247 return false;
6248 }
6249 else
6250 {
86a91c0a 6251 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
f307441a
RS
6252 if (!call || !gimple_call_internal_p (call))
6253 return false;
6254
6255 internal_fn ifn = gimple_call_internal_fn (call);
6256 if (!internal_store_fn_p (ifn))
c3a8f964 6257 return false;
ebfd146a 6258
c3a8f964
RS
6259 if (slp_node != NULL)
6260 {
6261 if (dump_enabled_p ())
6262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6263 "SLP of masked stores not supported.\n");
6264 return false;
6265 }
6266
f307441a
RS
6267 int mask_index = internal_fn_mask_index (ifn);
6268 if (mask_index >= 0)
6269 {
6270 mask = gimple_call_arg (call, mask_index);
86a91c0a 6271 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
929b4411 6272 &mask_vectype))
f307441a
RS
6273 return false;
6274 }
c3a8f964
RS
6275 }
6276
86a91c0a 6277 op = vect_get_store_rhs (stmt_info);
ebfd146a 6278
fce57248
RS
6279 /* Cannot have hybrid store SLP -- that would mean storing to the
6280 same location twice. */
6281 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6282
f4d09712 6283 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6284 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6285
6286 if (loop_vinfo)
b17dc4d4
RB
6287 {
6288 loop = LOOP_VINFO_LOOP (loop_vinfo);
6289 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6290 }
6291 else
6292 vf = 1;
465c8c19
JJ
6293
6294 /* Multiple types in SLP are handled by creating the appropriate number of
6295 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6296 case of SLP. */
fce57248 6297 if (slp)
465c8c19
JJ
6298 ncopies = 1;
6299 else
e8f142e2 6300 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6301
6302 gcc_assert (ncopies >= 1);
6303
6304 /* FORNOW. This restriction should be relaxed. */
86a91c0a 6305 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
465c8c19
JJ
6306 {
6307 if (dump_enabled_p ())
6308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6309 "multiple types in nested loop.\n");
6310 return false;
6311 }
6312
86a91c0a 6313 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6314 return false;
6315
272c6793 6316 elem_type = TREE_TYPE (vectype);
ebfd146a 6317 vec_mode = TYPE_MODE (vectype);
7b7b1813 6318
ebfd146a
IR
6319 if (!STMT_VINFO_DATA_REF (stmt_info))
6320 return false;
6321
2de001ee 6322 vect_memory_access_type memory_access_type;
86a91c0a 6323 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6324 &memory_access_type, &gs_info))
6325 return false;
3bab6342 6326
c3a8f964
RS
6327 if (mask)
6328 {
7e11fc7f
RS
6329 if (memory_access_type == VMAT_CONTIGUOUS)
6330 {
6331 if (!VECTOR_MODE_P (vec_mode)
6332 || !can_vec_mask_load_store_p (vec_mode,
6333 TYPE_MODE (mask_vectype), false))
6334 return false;
6335 }
f307441a
RS
6336 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6337 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6338 {
6339 if (dump_enabled_p ())
6340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6341 "unsupported access type for masked store.\n");
6342 return false;
6343 }
c3a8f964
RS
6344 }
6345 else
6346 {
6347 /* FORNOW. In some cases can vectorize even if data-type not supported
6348 (e.g. - array initialization with 0). */
6349 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6350 return false;
6351 }
6352
89fa689a 6353 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
f307441a 6354 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6355 && memory_access_type != VMAT_GATHER_SCATTER
6356 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6357 if (grouped_store)
6358 {
bffb8014 6359 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
89fa689a 6360 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
bffb8014 6361 group_size = DR_GROUP_SIZE (first_stmt_info);
7cfb4d93
RS
6362 }
6363 else
6364 {
bffb8014 6365 first_stmt_info = stmt_info;
89fa689a 6366 first_dr_info = dr_info;
7cfb4d93
RS
6367 group_size = vec_num = 1;
6368 }
6369
ebfd146a
IR
6370 if (!vec_stmt) /* transformation not required. */
6371 {
2de001ee 6372 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6373
6374 if (loop_vinfo
6375 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6376 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6377 memory_access_type, &gs_info);
7cfb4d93 6378
ebfd146a 6379 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6380 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6381 vls_type, slp_node, cost_vec);
ebfd146a
IR
6382 return true;
6383 }
2de001ee 6384 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6385
67b8dbac 6386 /* Transform. */
ebfd146a 6387
89fa689a 6388 ensure_base_align (dr_info);
c716e67f 6389
f307441a 6390 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6391 {
c3a8f964 6392 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6393 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6394 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6395 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6396 edge pe = loop_preheader_edge (loop);
6397 gimple_seq seq;
6398 basic_block new_bb;
6399 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6400 poly_uint64 scatter_off_nunits
6401 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6402
4d694b27 6403 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6404 modifier = NONE;
4d694b27 6405 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6406 {
3bab6342
AT
6407 modifier = WIDEN;
6408
4d694b27
RS
6409 /* Currently gathers and scatters are only supported for
6410 fixed-length vectors. */
6411 unsigned int count = scatter_off_nunits.to_constant ();
6412 vec_perm_builder sel (count, count, 1);
6413 for (i = 0; i < (unsigned int) count; ++i)
6414 sel.quick_push (i | (count / 2));
3bab6342 6415
4d694b27 6416 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6417 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6418 indices);
3bab6342
AT
6419 gcc_assert (perm_mask != NULL_TREE);
6420 }
4d694b27 6421 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6422 {
3bab6342
AT
6423 modifier = NARROW;
6424
4d694b27
RS
6425 /* Currently gathers and scatters are only supported for
6426 fixed-length vectors. */
6427 unsigned int count = nunits.to_constant ();
6428 vec_perm_builder sel (count, count, 1);
6429 for (i = 0; i < (unsigned int) count; ++i)
6430 sel.quick_push (i | (count / 2));
3bab6342 6431
4d694b27 6432 vec_perm_indices indices (sel, 2, count);
e3342de4 6433 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6434 gcc_assert (perm_mask != NULL_TREE);
6435 ncopies *= 2;
6436 }
6437 else
6438 gcc_unreachable ();
6439
134c85ca 6440 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6441 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6442 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6443 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6444 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6445 scaletype = TREE_VALUE (arglist);
6446
6447 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6448 && TREE_CODE (rettype) == VOID_TYPE);
6449
134c85ca 6450 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6451 if (!is_gimple_min_invariant (ptr))
6452 {
6453 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6454 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6455 gcc_assert (!new_bb);
6456 }
6457
6458 /* Currently we support only unconditional scatter stores,
6459 so mask should be all ones. */
6460 mask = build_int_cst (masktype, -1);
86a91c0a 6461 mask = vect_init_vector (stmt_info, mask, masktype, NULL);
3bab6342 6462
134c85ca 6463 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6464
6465 prev_stmt_info = NULL;
6466 for (j = 0; j < ncopies; ++j)
6467 {
6468 if (j == 0)
6469 {
6470 src = vec_oprnd1
86a91c0a 6471 = vect_get_vec_def_for_operand (op, stmt_info);
3bab6342 6472 op = vec_oprnd0
86a91c0a 6473 = vect_get_vec_def_for_operand (gs_info.offset, stmt_info);
3bab6342
AT
6474 }
6475 else if (modifier != NONE && (j & 1))
6476 {
6477 if (modifier == WIDEN)
6478 {
6479 src = vec_oprnd1
e4057a39 6480 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3bab6342 6481 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
86a91c0a 6482 stmt_info, gsi);
3bab6342
AT
6483 }
6484 else if (modifier == NARROW)
6485 {
6486 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
86a91c0a 6487 stmt_info, gsi);
3bab6342 6488 op = vec_oprnd0
e4057a39 6489 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3bab6342
AT
6490 }
6491 else
6492 gcc_unreachable ();
6493 }
6494 else
6495 {
6496 src = vec_oprnd1
e4057a39 6497 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3bab6342 6498 op = vec_oprnd0
e4057a39 6499 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3bab6342
AT
6500 }
6501
6502 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6503 {
928686b1
RS
6504 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6505 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6506 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342 6507 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
e1bd7296
RS
6508 gassign *new_stmt
6509 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
86a91c0a 6510 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3bab6342
AT
6511 src = var;
6512 }
6513
6514 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6515 {
928686b1
RS
6516 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6517 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6518 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342 6519 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
e1bd7296
RS
6520 gassign *new_stmt
6521 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
86a91c0a 6522 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3bab6342
AT
6523 op = var;
6524 }
6525
e1bd7296 6526 gcall *new_stmt
134c85ca 6527 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
e1bd7296 6528 stmt_vec_info new_stmt_info
86a91c0a 6529 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3bab6342 6530
ddf98a96 6531 if (prev_stmt_info == NULL)
e1bd7296 6532 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3bab6342 6533 else
e1bd7296
RS
6534 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6535 prev_stmt_info = new_stmt_info;
3bab6342
AT
6536 }
6537 return true;
6538 }
6539
f307441a 6540 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
bffb8014 6541 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
ebfd146a 6542
f307441a
RS
6543 if (grouped_store)
6544 {
ebfd146a 6545 /* FORNOW */
86a91c0a 6546 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
ebfd146a
IR
6547
6548 /* We vectorize all the stmts of the interleaving group when we
6549 reach the last stmt in the group. */
bffb8014
RS
6550 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6551 < DR_GROUP_SIZE (first_stmt_info)
ebfd146a
IR
6552 && !slp)
6553 {
6554 *vec_stmt = NULL;
6555 return true;
6556 }
6557
6558 if (slp)
4b5caab7 6559 {
0d0293ac 6560 grouped_store = false;
4b5caab7
IR
6561 /* VEC_NUM is the number of vect stmts to be created for this
6562 group. */
6563 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
bffb8014
RS
6564 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6565 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6566 == first_stmt_info);
89fa689a 6567 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
bffb8014 6568 op = vect_get_store_rhs (first_stmt_info);
4b5caab7 6569 }
ebfd146a 6570 else
4b5caab7
IR
6571 /* VEC_NUM is the number of vect stmts to be created for this
6572 group. */
ebfd146a 6573 vec_num = group_size;
44fc7854 6574
bffb8014 6575 ref_type = get_group_alias_ptr_type (first_stmt_info);
ebfd146a 6576 }
b8698a0f 6577 else
89fa689a 6578 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
b8698a0f 6579
73fbfcad 6580 if (dump_enabled_p ())
78c60e3d 6581 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6582 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6583
2de001ee
RS
6584 if (memory_access_type == VMAT_ELEMENTWISE
6585 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6586 {
6587 gimple_stmt_iterator incr_gsi;
6588 bool insert_after;
355fe088 6589 gimple *incr;
f2e2a985
MM
6590 tree offvar;
6591 tree ivstep;
6592 tree running_off;
f2e2a985
MM
6593 tree stride_base, stride_step, alias_off;
6594 tree vec_oprnd;
f502d50e 6595 unsigned int g;
4d694b27
RS
6596 /* Checked by get_load_store_type. */
6597 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6598
7cfb4d93 6599 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
86a91c0a 6600 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
f2e2a985
MM
6601
6602 stride_base
6603 = fold_build_pointer_plus
89fa689a 6604 (DR_BASE_ADDRESS (first_dr_info->dr),
f2e2a985 6605 size_binop (PLUS_EXPR,
89fa689a
RS
6606 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6607 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6608 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
f2e2a985
MM
6609
6610 /* For a store with loop-invariant (but other than power-of-2)
6611 stride (i.e. not a grouped access) like so:
6612
6613 for (i = 0; i < n; i += stride)
6614 array[i] = ...;
6615
6616 we generate a new induction variable and new stores from
6617 the components of the (vectorized) rhs:
6618
6619 for (j = 0; ; j += VF*stride)
6620 vectemp = ...;
6621 tmp1 = vectemp[0];
6622 array[j] = tmp1;
6623 tmp2 = vectemp[1];
6624 array[j + stride] = tmp2;
6625 ...
6626 */
6627
4d694b27 6628 unsigned nstores = const_nunits;
b17dc4d4 6629 unsigned lnel = 1;
cee62fee 6630 tree ltype = elem_type;
04199738 6631 tree lvectype = vectype;
cee62fee
MM
6632 if (slp)
6633 {
4d694b27
RS
6634 if (group_size < const_nunits
6635 && const_nunits % group_size == 0)
b17dc4d4 6636 {
4d694b27 6637 nstores = const_nunits / group_size;
b17dc4d4
RB
6638 lnel = group_size;
6639 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6640 lvectype = vectype;
6641
6642 /* First check if vec_extract optab doesn't support extraction
6643 of vector elts directly. */
b397965c 6644 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6645 machine_mode vmode;
6646 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6647 || !VECTOR_MODE_P (vmode)
414fef4e 6648 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6649 || (convert_optab_handler (vec_extract_optab,
6650 TYPE_MODE (vectype), vmode)
6651 == CODE_FOR_nothing))
6652 {
6653 /* Try to avoid emitting an extract of vector elements
6654 by performing the extracts using an integer type of the
6655 same size, extracting from a vector of those and then
6656 re-interpreting it as the original vector type if
6657 supported. */
6658 unsigned lsize
6659 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6660 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6661 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6662 /* If we can't construct such a vector fall back to
6663 element extracts from the original vector type and
6664 element size stores. */
4d694b27 6665 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6666 && VECTOR_MODE_P (vmode)
414fef4e 6667 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6668 && (convert_optab_handler (vec_extract_optab,
6669 vmode, elmode)
6670 != CODE_FOR_nothing))
6671 {
4d694b27 6672 nstores = lnunits;
04199738
RB
6673 lnel = group_size;
6674 ltype = build_nonstandard_integer_type (lsize, 1);
6675 lvectype = build_vector_type (ltype, nstores);
6676 }
6677 /* Else fall back to vector extraction anyway.
6678 Fewer stores are more important than avoiding spilling
6679 of the vector we extract from. Compared to the
6680 construction case in vectorizable_load no store-forwarding
6681 issue exists here for reasonable archs. */
6682 }
b17dc4d4 6683 }
4d694b27
RS
6684 else if (group_size >= const_nunits
6685 && group_size % const_nunits == 0)
b17dc4d4
RB
6686 {
6687 nstores = 1;
4d694b27 6688 lnel = const_nunits;
b17dc4d4 6689 ltype = vectype;
04199738 6690 lvectype = vectype;
b17dc4d4 6691 }
cee62fee
MM
6692 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6693 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6694 }
6695
f2e2a985
MM
6696 ivstep = stride_step;
6697 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6698 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6699
6700 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6701
b210f45f
RB
6702 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6703 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6704 create_iv (stride_base, ivstep, NULL,
6705 loop, &incr_gsi, insert_after,
6706 &offvar, NULL);
6707 incr = gsi_stmt (incr_gsi);
4fbeb363 6708 loop_vinfo->add_stmt (incr);
f2e2a985 6709
b210f45f 6710 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6711
6712 prev_stmt_info = NULL;
44fc7854 6713 alias_off = build_int_cst (ref_type, 0);
bffb8014 6714 stmt_vec_info next_stmt_info = first_stmt_info;
f502d50e 6715 for (g = 0; g < group_size; g++)
f2e2a985 6716 {
f502d50e
MM
6717 running_off = offvar;
6718 if (g)
f2e2a985 6719 {
f502d50e
MM
6720 tree size = TYPE_SIZE_UNIT (ltype);
6721 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6722 size);
f502d50e 6723 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6724 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6725 running_off, pos);
86a91c0a 6726 vect_finish_stmt_generation (stmt_info, incr, gsi);
f2e2a985 6727 running_off = newoff;
f502d50e 6728 }
b17dc4d4
RB
6729 unsigned int group_el = 0;
6730 unsigned HOST_WIDE_INT
6731 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6732 for (j = 0; j < ncopies; j++)
6733 {
c3a8f964 6734 /* We've set op and dt above, from vect_get_store_rhs,
bffb8014 6735 and first_stmt_info == stmt_info. */
f502d50e
MM
6736 if (j == 0)
6737 {
6738 if (slp)
6739 {
86a91c0a
RS
6740 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6741 &vec_oprnds, NULL, slp_node);
f502d50e
MM
6742 vec_oprnd = vec_oprnds[0];
6743 }
6744 else
6745 {
bffb8014
RS
6746 op = vect_get_store_rhs (next_stmt_info);
6747 vec_oprnd = vect_get_vec_def_for_operand
6748 (op, next_stmt_info);
f502d50e
MM
6749 }
6750 }
f2e2a985 6751 else
f502d50e
MM
6752 {
6753 if (slp)
6754 vec_oprnd = vec_oprnds[j];
6755 else
e4057a39
RS
6756 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6757 vec_oprnd);
f502d50e 6758 }
04199738
RB
6759 /* Pun the vector to extract from if necessary. */
6760 if (lvectype != vectype)
6761 {
6762 tree tem = make_ssa_name (lvectype);
6763 gimple *pun
6764 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6765 lvectype, vec_oprnd));
86a91c0a 6766 vect_finish_stmt_generation (stmt_info, pun, gsi);
04199738
RB
6767 vec_oprnd = tem;
6768 }
f502d50e
MM
6769 for (i = 0; i < nstores; i++)
6770 {
6771 tree newref, newoff;
355fe088 6772 gimple *incr, *assign;
f502d50e
MM
6773 tree size = TYPE_SIZE (ltype);
6774 /* Extract the i'th component. */
6775 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6776 bitsize_int (i), size);
6777 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6778 size, pos);
6779
6780 elem = force_gimple_operand_gsi (gsi, elem, true,
6781 NULL_TREE, true,
6782 GSI_SAME_STMT);
6783
b17dc4d4
RB
6784 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6785 group_el * elsz);
f502d50e 6786 newref = build2 (MEM_REF, ltype,
b17dc4d4 6787 running_off, this_off);
89fa689a 6788 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
f502d50e
MM
6789
6790 /* And store it to *running_off. */
6791 assign = gimple_build_assign (newref, elem);
e1bd7296 6792 stmt_vec_info assign_info
86a91c0a 6793 = vect_finish_stmt_generation (stmt_info, assign, gsi);
f502d50e 6794
b17dc4d4
RB
6795 group_el += lnel;
6796 if (! slp
6797 || group_el == group_size)
6798 {
6799 newoff = copy_ssa_name (running_off, NULL);
6800 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6801 running_off, stride_step);
86a91c0a 6802 vect_finish_stmt_generation (stmt_info, incr, gsi);
f502d50e 6803
b17dc4d4
RB
6804 running_off = newoff;
6805 group_el = 0;
6806 }
225ce44b
RB
6807 if (g == group_size - 1
6808 && !slp)
f502d50e
MM
6809 {
6810 if (j == 0 && i == 0)
225ce44b 6811 STMT_VINFO_VEC_STMT (stmt_info)
e1bd7296 6812 = *vec_stmt = assign_info;
f502d50e 6813 else
e1bd7296
RS
6814 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6815 prev_stmt_info = assign_info;
f502d50e
MM
6816 }
6817 }
f2e2a985 6818 }
bffb8014 6819 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
b17dc4d4
RB
6820 if (slp)
6821 break;
f2e2a985 6822 }
778dd3b6
RB
6823
6824 vec_oprnds.release ();
f2e2a985
MM
6825 return true;
6826 }
6827
8c681247 6828 auto_vec<tree> dr_chain (group_size);
9771b263 6829 oprnds.create (group_size);
ebfd146a 6830
89fa689a
RS
6831 alignment_support_scheme
6832 = vect_supportable_dr_alignment (first_dr_info, false);
ebfd146a 6833 gcc_assert (alignment_support_scheme);
70088b95
RS
6834 vec_loop_masks *loop_masks
6835 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6836 ? &LOOP_VINFO_MASKS (loop_vinfo)
6837 : NULL);
272c6793 6838 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6839 realignment. vect_supportable_dr_alignment always returns either
6840 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6841 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6842 && !mask
70088b95 6843 && !loop_masks)
272c6793
RS
6844 || alignment_support_scheme == dr_aligned
6845 || alignment_support_scheme == dr_unaligned_supported);
6846
62da9e14
RS
6847 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6848 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6849 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6850
f307441a
RS
6851 tree bump;
6852 tree vec_offset = NULL_TREE;
6853 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6854 {
6855 aggr_type = NULL_TREE;
6856 bump = NULL_TREE;
6857 }
6858 else if (memory_access_type == VMAT_GATHER_SCATTER)
6859 {
6860 aggr_type = elem_type;
86a91c0a 6861 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
f307441a
RS
6862 &bump, &vec_offset);
6863 }
272c6793 6864 else
f307441a
RS
6865 {
6866 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6867 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6868 else
6869 aggr_type = vectype;
89fa689a
RS
6870 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
6871 memory_access_type);
f307441a 6872 }
ebfd146a 6873
c3a8f964
RS
6874 if (mask)
6875 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6876
ebfd146a
IR
6877 /* In case the vectorization factor (VF) is bigger than the number
6878 of elements that we can fit in a vectype (nunits), we have to generate
6879 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6880 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6881 vect_get_vec_def_for_copy_stmt. */
6882
0d0293ac 6883 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6884
6885 S1: &base + 2 = x2
6886 S2: &base = x0
6887 S3: &base + 1 = x1
6888 S4: &base + 3 = x3
6889
6890 We create vectorized stores starting from base address (the access of the
6891 first stmt in the chain (S2 in the above example), when the last store stmt
6892 of the chain (S4) is reached:
6893
6894 VS1: &base = vx2
6895 VS2: &base + vec_size*1 = vx0
6896 VS3: &base + vec_size*2 = vx1
6897 VS4: &base + vec_size*3 = vx3
6898
6899 Then permutation statements are generated:
6900
3fcc1b55
JJ
6901 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6902 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6903 ...
b8698a0f 6904
ebfd146a
IR
6905 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6906 (the order of the data-refs in the output of vect_permute_store_chain
6907 corresponds to the order of scalar stmts in the interleaving chain - see
6908 the documentation of vect_permute_store_chain()).
6909
6910 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6911 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6912 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6913 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6914 */
6915
6916 prev_stmt_info = NULL;
c3a8f964 6917 tree vec_mask = NULL_TREE;
ebfd146a
IR
6918 for (j = 0; j < ncopies; j++)
6919 {
e1bd7296 6920 stmt_vec_info new_stmt_info;
ebfd146a
IR
6921 if (j == 0)
6922 {
6923 if (slp)
6924 {
6925 /* Get vectorized arguments for SLP_NODE. */
86a91c0a
RS
6926 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
6927 NULL, slp_node);
ebfd146a 6928
9771b263 6929 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6930 }
6931 else
6932 {
b8698a0f
L
6933 /* For interleaved stores we collect vectorized defs for all the
6934 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6935 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6936 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6937
2c53b149 6938 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6939 OPRNDS are of size 1. */
bffb8014 6940 stmt_vec_info next_stmt_info = first_stmt_info;
ebfd146a
IR
6941 for (i = 0; i < group_size; i++)
6942 {
b8698a0f 6943 /* Since gaps are not supported for interleaved stores,
2c53b149 6944 DR_GROUP_SIZE is the exact number of stmts in the chain.
bffb8014
RS
6945 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6946 that there is no interleaving, DR_GROUP_SIZE is 1,
6947 and only one iteration of the loop will be executed. */
6948 op = vect_get_store_rhs (next_stmt_info);
6949 vec_oprnd = vect_get_vec_def_for_operand
6950 (op, next_stmt_info);
9771b263
DN
6951 dr_chain.quick_push (vec_oprnd);
6952 oprnds.quick_push (vec_oprnd);
bffb8014 6953 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
ebfd146a 6954 }
c3a8f964 6955 if (mask)
86a91c0a 6956 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
c3a8f964 6957 mask_vectype);
ebfd146a
IR
6958 }
6959
6960 /* We should have catched mismatched types earlier. */
6961 gcc_assert (useless_type_conversion_p (vectype,
6962 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6963 bool simd_lane_access_p
6964 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6965 if (simd_lane_access_p
89fa689a
RS
6966 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
6967 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
6968 && integer_zerop (DR_OFFSET (first_dr_info->dr))
6969 && integer_zerop (DR_INIT (first_dr_info->dr))
74bf76ed 6970 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6971 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed 6972 {
89fa689a 6973 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
44fc7854 6974 dataref_offset = build_int_cst (ref_type, 0);
74bf76ed 6975 }
f307441a 6976 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2d4bca81
RS
6977 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
6978 &dataref_ptr, &vec_offset);
74bf76ed
JJ
6979 else
6980 dataref_ptr
bffb8014 6981 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
74bf76ed 6982 simd_lane_access_p ? loop : NULL,
09dfa495 6983 offset, &dummy, gsi, &ptr_incr,
2d4bca81 6984 simd_lane_access_p, NULL_TREE, bump);
ebfd146a 6985 }
b8698a0f 6986 else
ebfd146a 6987 {
b8698a0f
L
6988 /* For interleaved stores we created vectorized defs for all the
6989 defs stored in OPRNDS in the previous iteration (previous copy).
6990 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6991 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6992 next copy.
2c53b149 6993 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6994 OPRNDS are of size 1. */
6995 for (i = 0; i < group_size; i++)
6996 {
9771b263 6997 op = oprnds[i];
e4057a39 6998 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
9771b263
DN
6999 dr_chain[i] = vec_oprnd;
7000 oprnds[i] = vec_oprnd;
ebfd146a 7001 }
c3a8f964 7002 if (mask)
e4057a39 7003 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
74bf76ed
JJ
7004 if (dataref_offset)
7005 dataref_offset
f307441a
RS
7006 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7007 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
e4057a39 7008 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
74bf76ed 7009 else
86a91c0a
RS
7010 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7011 stmt_info, bump);
ebfd146a
IR
7012 }
7013
2de001ee 7014 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7015 {
272c6793 7016 tree vec_array;
267d3070 7017
3ba4ff41 7018 /* Get an array into which we can store the individual vectors. */
272c6793 7019 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7020
7021 /* Invalidate the current contents of VEC_ARRAY. This should
7022 become an RTL clobber too, which prevents the vector registers
7023 from being upward-exposed. */
86a91c0a 7024 vect_clobber_variable (stmt_info, gsi, vec_array);
3ba4ff41
RS
7025
7026 /* Store the individual vectors into the array. */
272c6793 7027 for (i = 0; i < vec_num; i++)
c2d7ab2a 7028 {
9771b263 7029 vec_oprnd = dr_chain[i];
86a91c0a 7030 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
267d3070 7031 }
b8698a0f 7032
7cfb4d93 7033 tree final_mask = NULL;
70088b95
RS
7034 if (loop_masks)
7035 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7036 vectype, j);
7cfb4d93
RS
7037 if (vec_mask)
7038 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7039 vec_mask, gsi);
7040
7e11fc7f 7041 gcall *call;
7cfb4d93 7042 if (final_mask)
7e11fc7f
RS
7043 {
7044 /* Emit:
7045 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7046 VEC_ARRAY). */
7047 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7048 tree alias_ptr = build_int_cst (ref_type, align);
7049 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7050 dataref_ptr, alias_ptr,
7cfb4d93 7051 final_mask, vec_array);
7e11fc7f
RS
7052 }
7053 else
7054 {
7055 /* Emit:
7056 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7057 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7058 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7059 vec_array);
7060 gimple_call_set_lhs (call, data_ref);
7061 }
a844293d 7062 gimple_call_set_nothrow (call, true);
86a91c0a 7063 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
3ba4ff41
RS
7064
7065 /* Record that VEC_ARRAY is now dead. */
86a91c0a 7066 vect_clobber_variable (stmt_info, gsi, vec_array);
272c6793
RS
7067 }
7068 else
7069 {
e1bd7296 7070 new_stmt_info = NULL;
0d0293ac 7071 if (grouped_store)
272c6793 7072 {
b6b9227d
JJ
7073 if (j == 0)
7074 result_chain.create (group_size);
272c6793 7075 /* Permute. */
86a91c0a 7076 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
272c6793
RS
7077 &result_chain);
7078 }
c2d7ab2a 7079
bffb8014 7080 stmt_vec_info next_stmt_info = first_stmt_info;
272c6793
RS
7081 for (i = 0; i < vec_num; i++)
7082 {
644ffefd 7083 unsigned align, misalign;
272c6793 7084
7cfb4d93 7085 tree final_mask = NULL_TREE;
70088b95
RS
7086 if (loop_masks)
7087 final_mask = vect_get_loop_mask (gsi, loop_masks,
7088 vec_num * ncopies,
7cfb4d93
RS
7089 vectype, vec_num * j + i);
7090 if (vec_mask)
7091 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7092 vec_mask, gsi);
7093
f307441a
RS
7094 if (memory_access_type == VMAT_GATHER_SCATTER)
7095 {
7096 tree scale = size_int (gs_info.scale);
7097 gcall *call;
70088b95 7098 if (loop_masks)
f307441a
RS
7099 call = gimple_build_call_internal
7100 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7101 scale, vec_oprnd, final_mask);
7102 else
7103 call = gimple_build_call_internal
7104 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7105 scale, vec_oprnd);
7106 gimple_call_set_nothrow (call, true);
e1bd7296 7107 new_stmt_info
86a91c0a 7108 = vect_finish_stmt_generation (stmt_info, call, gsi);
f307441a
RS
7109 break;
7110 }
7111
272c6793
RS
7112 if (i > 0)
7113 /* Bump the vector pointer. */
7114 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
86a91c0a 7115 stmt_info, bump);
272c6793
RS
7116
7117 if (slp)
9771b263 7118 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7119 else if (grouped_store)
7120 /* For grouped stores vectorized defs are interleaved in
272c6793 7121 vect_permute_store_chain(). */
9771b263 7122 vec_oprnd = result_chain[i];
272c6793 7123
89fa689a
RS
7124 align = DR_TARGET_ALIGNMENT (first_dr_info);
7125 if (aligned_access_p (first_dr_info))
644ffefd 7126 misalign = 0;
89fa689a 7127 else if (DR_MISALIGNMENT (first_dr_info) == -1)
272c6793 7128 {
89fa689a 7129 align = dr_alignment (vect_dr_behavior (first_dr_info));
52639a61 7130 misalign = 0;
272c6793
RS
7131 }
7132 else
89fa689a 7133 misalign = DR_MISALIGNMENT (first_dr_info);
aed93b23
RB
7134 if (dataref_offset == NULL_TREE
7135 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7136 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7137 misalign);
c2d7ab2a 7138
62da9e14 7139 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7140 {
7141 tree perm_mask = perm_mask_for_reverse (vectype);
86a91c0a
RS
7142 tree perm_dest = vect_create_destination_var
7143 (vect_get_store_rhs (stmt_info), vectype);
b731b390 7144 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7145
7146 /* Generate the permute statement. */
355fe088 7147 gimple *perm_stmt
0d0e4a03
JJ
7148 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7149 vec_oprnd, perm_mask);
86a91c0a 7150 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
09dfa495
BM
7151
7152 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7153 vec_oprnd = new_temp;
7154 }
7155
272c6793 7156 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7157 if (final_mask)
c3a8f964
RS
7158 {
7159 align = least_bit_hwi (misalign | align);
7160 tree ptr = build_int_cst (ref_type, align);
7161 gcall *call
7162 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7163 dataref_ptr, ptr,
7cfb4d93 7164 final_mask, vec_oprnd);
c3a8f964 7165 gimple_call_set_nothrow (call, true);
e1bd7296 7166 new_stmt_info
86a91c0a 7167 = vect_finish_stmt_generation (stmt_info, call, gsi);
c3a8f964
RS
7168 }
7169 else
7170 {
7171 data_ref = fold_build2 (MEM_REF, vectype,
7172 dataref_ptr,
7173 dataref_offset
7174 ? dataref_offset
7175 : build_int_cst (ref_type, 0));
89fa689a 7176 if (aligned_access_p (first_dr_info))
c3a8f964 7177 ;
89fa689a 7178 else if (DR_MISALIGNMENT (first_dr_info) == -1)
c3a8f964
RS
7179 TREE_TYPE (data_ref)
7180 = build_aligned_type (TREE_TYPE (data_ref),
7181 align * BITS_PER_UNIT);
7182 else
7183 TREE_TYPE (data_ref)
7184 = build_aligned_type (TREE_TYPE (data_ref),
7185 TYPE_ALIGN (elem_type));
89fa689a 7186 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
e1bd7296
RS
7187 gassign *new_stmt
7188 = gimple_build_assign (data_ref, vec_oprnd);
7189 new_stmt_info
86a91c0a 7190 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
c3a8f964 7191 }
272c6793
RS
7192
7193 if (slp)
7194 continue;
7195
bffb8014
RS
7196 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7197 if (!next_stmt_info)
272c6793
RS
7198 break;
7199 }
ebfd146a 7200 }
1da0876c
RS
7201 if (!slp)
7202 {
7203 if (j == 0)
e1bd7296 7204 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
1da0876c 7205 else
e1bd7296
RS
7206 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7207 prev_stmt_info = new_stmt_info;
1da0876c 7208 }
ebfd146a
IR
7209 }
7210
9771b263
DN
7211 oprnds.release ();
7212 result_chain.release ();
7213 vec_oprnds.release ();
ebfd146a
IR
7214
7215 return true;
7216}
7217
557be5a8
AL
7218/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7219 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7220 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7221 vect_gen_perm_mask_checked. */
a1e53f3f 7222
3fcc1b55 7223tree
4aae3cb3 7224vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7225{
b00cb3bf 7226 tree mask_type;
a1e53f3f 7227
0ecc2b7d
RS
7228 poly_uint64 nunits = sel.length ();
7229 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7230
7231 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7232 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7233}
7234
7ac7e286 7235/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7236 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7237
7238tree
4aae3cb3 7239vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7240{
7ac7e286 7241 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7242 return vect_gen_perm_mask_any (vectype, sel);
7243}
7244
aec7ae7d 7245/* Given a vector variable X and Y, that was generated for the scalar
82570274 7246 STMT_INFO, generate instructions to permute the vector elements of X and Y
aec7ae7d
JJ
7247 using permutation mask MASK_VEC, insert them at *GSI and return the
7248 permuted vector variable. */
a1e53f3f
L
7249
7250static tree
82570274 7251permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
aec7ae7d 7252 gimple_stmt_iterator *gsi)
a1e53f3f
L
7253{
7254 tree vectype = TREE_TYPE (x);
aec7ae7d 7255 tree perm_dest, data_ref;
355fe088 7256 gimple *perm_stmt;
a1e53f3f 7257
82570274 7258 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7ad429a4
RS
7259 if (TREE_CODE (scalar_dest) == SSA_NAME)
7260 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7261 else
7262 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7263 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7264
7265 /* Generate the permute statement. */
0d0e4a03 7266 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
82570274 7267 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
a1e53f3f
L
7268
7269 return data_ref;
7270}
7271
32e8e429 7272/* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
6b916b36 7273 inserting them on the loops preheader edge. Returns true if we
32e8e429 7274 were successful in doing so (and thus STMT_INFO can be moved then),
6b916b36
RB
7275 otherwise returns false. */
7276
7277static bool
32e8e429 7278hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
6b916b36
RB
7279{
7280 ssa_op_iter i;
7281 tree op;
7282 bool any = false;
7283
32e8e429 7284 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
6b916b36 7285 {
355fe088 7286 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7287 if (!gimple_nop_p (def_stmt)
7288 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7289 {
7290 /* Make sure we don't need to recurse. While we could do
7291 so in simple cases when there are more complex use webs
7292 we don't have an easy way to preserve stmt order to fulfil
7293 dependencies within them. */
7294 tree op2;
7295 ssa_op_iter i2;
d1417442
JJ
7296 if (gimple_code (def_stmt) == GIMPLE_PHI)
7297 return false;
6b916b36
RB
7298 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7299 {
355fe088 7300 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7301 if (!gimple_nop_p (def_stmt2)
7302 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7303 return false;
7304 }
7305 any = true;
7306 }
7307 }
7308
7309 if (!any)
7310 return true;
7311
32e8e429 7312 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
6b916b36 7313 {
355fe088 7314 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7315 if (!gimple_nop_p (def_stmt)
7316 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7317 {
7318 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7319 gsi_remove (&gsi, false);
7320 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7321 }
7322 }
7323
7324 return true;
7325}
7326
ebfd146a
IR
7327/* vectorizable_load.
7328
32e8e429
RS
7329 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7330 that can be vectorized.
7331 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7332 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7333 Return true if STMT_INFO is vectorizable in this way. */
ebfd146a
IR
7334
7335static bool
32e8e429 7336vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195
RS
7337 stmt_vec_info *vec_stmt, slp_tree slp_node,
7338 slp_instance slp_node_instance,
68435eb2 7339 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7340{
7341 tree scalar_dest;
7342 tree vec_dest = NULL;
7343 tree data_ref = NULL;
b8698a0f 7344 stmt_vec_info prev_stmt_info;
ebfd146a 7345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7346 struct loop *loop = NULL;
32e8e429 7347 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
a70d6342 7348 bool nested_in_vect_loop = false;
272c6793 7349 tree elem_type;
ebfd146a 7350 tree new_temp;
ef4bddc2 7351 machine_mode mode;
ebfd146a
IR
7352 tree dummy;
7353 enum dr_alignment_support alignment_support_scheme;
7354 tree dataref_ptr = NULL_TREE;
74bf76ed 7355 tree dataref_offset = NULL_TREE;
355fe088 7356 gimple *ptr_incr = NULL;
ebfd146a 7357 int ncopies;
4d694b27
RS
7358 int i, j;
7359 unsigned int group_size;
7360 poly_uint64 group_gap_adj;
ebfd146a
IR
7361 tree msq = NULL_TREE, lsq;
7362 tree offset = NULL_TREE;
356bbc4c 7363 tree byte_offset = NULL_TREE;
ebfd146a 7364 tree realignment_token = NULL_TREE;
538dd0b7 7365 gphi *phi = NULL;
6e1aa848 7366 vec<tree> dr_chain = vNULL;
0d0293ac 7367 bool grouped_load = false;
bffb8014 7368 stmt_vec_info first_stmt_info;
b9787581 7369 stmt_vec_info first_stmt_info_for_drptr = NULL;
ebfd146a
IR
7370 bool compute_in_loop = false;
7371 struct loop *at_loop;
7372 int vec_num;
7373 bool slp = (slp_node != NULL);
7374 bool slp_perm = false;
a70d6342 7375 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7376 poly_uint64 vf;
272c6793 7377 tree aggr_type;
134c85ca 7378 gather_scatter_info gs_info;
310213d4 7379 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7380 tree ref_type;
929b4411 7381 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7382
465c8c19
JJ
7383 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7384 return false;
7385
66c16fd9
RB
7386 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7387 && ! vec_stmt)
465c8c19
JJ
7388 return false;
7389
c3a8f964 7390 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
86a91c0a 7391 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
c3a8f964 7392 {
beb456c3 7393 scalar_dest = gimple_assign_lhs (assign);
c3a8f964
RS
7394 if (TREE_CODE (scalar_dest) != SSA_NAME)
7395 return false;
465c8c19 7396
beb456c3 7397 tree_code code = gimple_assign_rhs_code (assign);
c3a8f964
RS
7398 if (code != ARRAY_REF
7399 && code != BIT_FIELD_REF
7400 && code != INDIRECT_REF
7401 && code != COMPONENT_REF
7402 && code != IMAGPART_EXPR
7403 && code != REALPART_EXPR
7404 && code != MEM_REF
7405 && TREE_CODE_CLASS (code) != tcc_declaration)
7406 return false;
7407 }
7408 else
7409 {
86a91c0a 7410 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
bfaa08b7
RS
7411 if (!call || !gimple_call_internal_p (call))
7412 return false;
7413
7414 internal_fn ifn = gimple_call_internal_fn (call);
7415 if (!internal_load_fn_p (ifn))
c3a8f964 7416 return false;
465c8c19 7417
c3a8f964
RS
7418 scalar_dest = gimple_call_lhs (call);
7419 if (!scalar_dest)
7420 return false;
7421
7422 if (slp_node != NULL)
7423 {
7424 if (dump_enabled_p ())
7425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7426 "SLP of masked loads not supported.\n");
7427 return false;
7428 }
7429
bfaa08b7
RS
7430 int mask_index = internal_fn_mask_index (ifn);
7431 if (mask_index >= 0)
7432 {
7433 mask = gimple_call_arg (call, mask_index);
86a91c0a 7434 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
929b4411 7435 &mask_vectype))
bfaa08b7
RS
7436 return false;
7437 }
c3a8f964 7438 }
465c8c19
JJ
7439
7440 if (!STMT_VINFO_DATA_REF (stmt_info))
7441 return false;
7442
7443 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7444 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7445
a70d6342
IR
7446 if (loop_vinfo)
7447 {
7448 loop = LOOP_VINFO_LOOP (loop_vinfo);
86a91c0a 7449 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
a70d6342
IR
7450 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7451 }
7452 else
3533e503 7453 vf = 1;
ebfd146a
IR
7454
7455 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7456 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7457 case of SLP. */
fce57248 7458 if (slp)
ebfd146a
IR
7459 ncopies = 1;
7460 else
e8f142e2 7461 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7462
7463 gcc_assert (ncopies >= 1);
7464
7465 /* FORNOW. This restriction should be relaxed. */
7466 if (nested_in_vect_loop && ncopies > 1)
7467 {
73fbfcad 7468 if (dump_enabled_p ())
78c60e3d 7469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7470 "multiple types in nested loop.\n");
ebfd146a
IR
7471 return false;
7472 }
7473
f2556b68
RB
7474 /* Invalidate assumptions made by dependence analysis when vectorization
7475 on the unrolled body effectively re-orders stmts. */
7476 if (ncopies > 1
7477 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7478 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7479 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7480 {
7481 if (dump_enabled_p ())
7482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7483 "cannot perform implicit CSE when unrolling "
7484 "with negative dependence distance\n");
7485 return false;
7486 }
7487
7b7b1813 7488 elem_type = TREE_TYPE (vectype);
947131ba 7489 mode = TYPE_MODE (vectype);
ebfd146a
IR
7490
7491 /* FORNOW. In some cases can vectorize even if data-type not supported
7492 (e.g. - data copies). */
947131ba 7493 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7494 {
73fbfcad 7495 if (dump_enabled_p ())
78c60e3d 7496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7497 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7498 return false;
7499 }
7500
ebfd146a 7501 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7502 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7503 {
0d0293ac 7504 grouped_load = true;
ebfd146a 7505 /* FORNOW */
2de001ee
RS
7506 gcc_assert (!nested_in_vect_loop);
7507 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7508
bffb8014
RS
7509 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7510 group_size = DR_GROUP_SIZE (first_stmt_info);
d5f035ea 7511
b1af7da6
RB
7512 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7513 slp_perm = true;
7514
f2556b68
RB
7515 /* Invalidate assumptions made by dependence analysis when vectorization
7516 on the unrolled body effectively re-orders stmts. */
7517 if (!PURE_SLP_STMT (stmt_info)
7518 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7519 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7520 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7521 {
7522 if (dump_enabled_p ())
7523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7524 "cannot perform implicit CSE when performing "
7525 "group loads with negative dependence distance\n");
7526 return false;
7527 }
96bb56b2
RB
7528
7529 /* Similarly when the stmt is a load that is both part of a SLP
7530 instance and a loop vectorized stmt via the same-dr mechanism
7531 we have to give up. */
2c53b149 7532 if (DR_GROUP_SAME_DR_STMT (stmt_info)
96bb56b2 7533 && (STMT_SLP_TYPE (stmt_info)
c26228d4 7534 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
96bb56b2
RB
7535 {
7536 if (dump_enabled_p ())
7537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7538 "conflicting SLP types for CSEd load\n");
7539 return false;
7540 }
ebfd146a 7541 }
7cfb4d93
RS
7542 else
7543 group_size = 1;
ebfd146a 7544
2de001ee 7545 vect_memory_access_type memory_access_type;
86a91c0a 7546 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7547 &memory_access_type, &gs_info))
7548 return false;
a1e53f3f 7549
c3a8f964
RS
7550 if (mask)
7551 {
7552 if (memory_access_type == VMAT_CONTIGUOUS)
7553 {
7e11fc7f
RS
7554 machine_mode vec_mode = TYPE_MODE (vectype);
7555 if (!VECTOR_MODE_P (vec_mode)
7556 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7557 TYPE_MODE (mask_vectype), true))
7558 return false;
7559 }
bfaa08b7 7560 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7561 {
7562 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7563 tree masktype
7564 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7565 if (TREE_CODE (masktype) == INTEGER_TYPE)
7566 {
7567 if (dump_enabled_p ())
7568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7569 "masked gather with integer mask not"
7570 " supported.");
7571 return false;
7572 }
7573 }
bfaa08b7
RS
7574 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7575 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7576 {
7577 if (dump_enabled_p ())
7578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7579 "unsupported access type for masked load.\n");
7580 return false;
7581 }
7582 }
7583
ebfd146a
IR
7584 if (!vec_stmt) /* transformation not required. */
7585 {
2de001ee
RS
7586 if (!slp)
7587 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7588
7589 if (loop_vinfo
7590 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7591 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7592 memory_access_type, &gs_info);
7cfb4d93 7593
ebfd146a 7594 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7595 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7596 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7597 return true;
7598 }
7599
2de001ee
RS
7600 if (!slp)
7601 gcc_assert (memory_access_type
7602 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7603
73fbfcad 7604 if (dump_enabled_p ())
78c60e3d 7605 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7606 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7607
67b8dbac 7608 /* Transform. */
ebfd146a 7609
89fa689a
RS
7610 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7611 ensure_base_align (dr_info);
c716e67f 7612
bfaa08b7 7613 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7614 {
e4057a39 7615 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
aec7ae7d
JJ
7616 return true;
7617 }
2de001ee 7618
2d4bca81
RS
7619 if (memory_access_type == VMAT_INVARIANT)
7620 {
7621 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7622 /* If we have versioned for aliasing or the loop doesn't
7623 have any data dependencies that would preclude this,
7624 then we are sure this is a loop invariant load and
7625 thus we can insert it on the preheader edge. */
7626 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7627 && !nested_in_vect_loop
7628 && hoist_defs_of_uses (stmt_info, loop));
7629 if (hoist_p)
7630 {
7631 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7632 if (dump_enabled_p ())
3c2a8ed0
DM
7633 dump_printf_loc (MSG_NOTE, vect_location,
7634 "hoisting out of the vectorized loop: %G", stmt);
2d4bca81
RS
7635 scalar_dest = copy_ssa_name (scalar_dest);
7636 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7637 gsi_insert_on_edge_immediate
7638 (loop_preheader_edge (loop),
7639 gimple_build_assign (scalar_dest, rhs));
7640 }
7641 /* These copies are all equivalent, but currently the representation
7642 requires a separate STMT_VINFO_VEC_STMT for each one. */
7643 prev_stmt_info = NULL;
7644 gimple_stmt_iterator gsi2 = *gsi;
7645 gsi_next (&gsi2);
7646 for (j = 0; j < ncopies; j++)
7647 {
7648 stmt_vec_info new_stmt_info;
7649 if (hoist_p)
7650 {
7651 new_temp = vect_init_vector (stmt_info, scalar_dest,
7652 vectype, NULL);
7653 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7654 new_stmt_info = vinfo->add_stmt (new_stmt);
7655 }
7656 else
7657 {
7658 new_temp = vect_init_vector (stmt_info, scalar_dest,
7659 vectype, &gsi2);
7660 new_stmt_info = vinfo->lookup_def (new_temp);
7661 }
7662 if (slp)
7663 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7664 else if (j == 0)
7665 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7666 else
7667 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7668 prev_stmt_info = new_stmt_info;
7669 }
7670 return true;
7671 }
7672
2de001ee
RS
7673 if (memory_access_type == VMAT_ELEMENTWISE
7674 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7675 {
7676 gimple_stmt_iterator incr_gsi;
7677 bool insert_after;
355fe088 7678 gimple *incr;
7d75abc8 7679 tree offvar;
7d75abc8
MM
7680 tree ivstep;
7681 tree running_off;
9771b263 7682 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7683 tree stride_base, stride_step, alias_off;
4d694b27
RS
7684 /* Checked by get_load_store_type. */
7685 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7686 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7687
7cfb4d93 7688 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7689 gcc_assert (!nested_in_vect_loop);
7d75abc8 7690
b210f45f 7691 if (grouped_load)
44fc7854 7692 {
bffb8014 7693 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
89fa689a 7694 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
44fc7854 7695 }
ab313a8c 7696 else
44fc7854 7697 {
bffb8014 7698 first_stmt_info = stmt_info;
89fa689a 7699 first_dr_info = dr_info;
b210f45f
RB
7700 }
7701 if (slp && grouped_load)
7702 {
bffb8014
RS
7703 group_size = DR_GROUP_SIZE (first_stmt_info);
7704 ref_type = get_group_alias_ptr_type (first_stmt_info);
b210f45f
RB
7705 }
7706 else
7707 {
7708 if (grouped_load)
7709 cst_offset
7710 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
86a91c0a 7711 * vect_get_place_in_interleaving_chain (stmt_info,
bffb8014 7712 first_stmt_info));
44fc7854 7713 group_size = 1;
89fa689a 7714 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
44fc7854 7715 }
ab313a8c 7716
14ac6aa2
RB
7717 stride_base
7718 = fold_build_pointer_plus
89fa689a 7719 (DR_BASE_ADDRESS (first_dr_info->dr),
14ac6aa2 7720 size_binop (PLUS_EXPR,
89fa689a
RS
7721 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7722 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7723 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7d75abc8
MM
7724
7725 /* For a load with loop-invariant (but other than power-of-2)
7726 stride (i.e. not a grouped access) like so:
7727
7728 for (i = 0; i < n; i += stride)
7729 ... = array[i];
7730
7731 we generate a new induction variable and new accesses to
7732 form a new vector (or vectors, depending on ncopies):
7733
7734 for (j = 0; ; j += VF*stride)
7735 tmp1 = array[j];
7736 tmp2 = array[j + stride];
7737 ...
7738 vectemp = {tmp1, tmp2, ...}
7739 */
7740
ab313a8c
RB
7741 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7742 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7743
7744 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7745
b210f45f
RB
7746 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7747 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7748 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7749 loop, &incr_gsi, insert_after,
7750 &offvar, NULL);
7751 incr = gsi_stmt (incr_gsi);
4fbeb363 7752 loop_vinfo->add_stmt (incr);
7d75abc8 7753
b210f45f 7754 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7755
7756 prev_stmt_info = NULL;
7757 running_off = offvar;
44fc7854 7758 alias_off = build_int_cst (ref_type, 0);
4d694b27 7759 int nloads = const_nunits;
e09b4c37 7760 int lnel = 1;
7b5fc413 7761 tree ltype = TREE_TYPE (vectype);
ea60dd34 7762 tree lvectype = vectype;
b266b968 7763 auto_vec<tree> dr_chain;
2de001ee 7764 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7765 {
4d694b27 7766 if (group_size < const_nunits)
e09b4c37 7767 {
ff03930a
JJ
7768 /* First check if vec_init optab supports construction from
7769 vector elts directly. */
b397965c 7770 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7771 machine_mode vmode;
7772 if (mode_for_vector (elmode, group_size).exists (&vmode)
7773 && VECTOR_MODE_P (vmode)
414fef4e 7774 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7775 && (convert_optab_handler (vec_init_optab,
7776 TYPE_MODE (vectype), vmode)
7777 != CODE_FOR_nothing))
ea60dd34 7778 {
4d694b27 7779 nloads = const_nunits / group_size;
ea60dd34 7780 lnel = group_size;
ff03930a
JJ
7781 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7782 }
7783 else
7784 {
7785 /* Otherwise avoid emitting a constructor of vector elements
7786 by performing the loads using an integer type of the same
7787 size, constructing a vector of those and then
7788 re-interpreting it as the original vector type.
7789 This avoids a huge runtime penalty due to the general
7790 inability to perform store forwarding from smaller stores
7791 to a larger load. */
7792 unsigned lsize
7793 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7794 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7795 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7796 /* If we can't construct such a vector fall back to
7797 element loads of the original vector type. */
4d694b27 7798 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7799 && VECTOR_MODE_P (vmode)
414fef4e 7800 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7801 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7802 != CODE_FOR_nothing))
7803 {
4d694b27 7804 nloads = lnunits;
ff03930a
JJ
7805 lnel = group_size;
7806 ltype = build_nonstandard_integer_type (lsize, 1);
7807 lvectype = build_vector_type (ltype, nloads);
7808 }
ea60dd34 7809 }
e09b4c37 7810 }
2de001ee 7811 else
e09b4c37 7812 {
ea60dd34 7813 nloads = 1;
4d694b27 7814 lnel = const_nunits;
e09b4c37 7815 ltype = vectype;
e09b4c37 7816 }
2de001ee
RS
7817 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7818 }
bb4e4747
BC
7819 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7820 else if (nloads == 1)
7821 ltype = vectype;
7822
2de001ee
RS
7823 if (slp)
7824 {
66c16fd9
RB
7825 /* For SLP permutation support we need to load the whole group,
7826 not only the number of vector stmts the permutation result
7827 fits in. */
b266b968 7828 if (slp_perm)
66c16fd9 7829 {
d9f21f6a
RS
7830 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7831 variable VF. */
7832 unsigned int const_vf = vf.to_constant ();
4d694b27 7833 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7834 dr_chain.create (ncopies);
7835 }
7836 else
7837 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7838 }
4d694b27 7839 unsigned int group_el = 0;
e09b4c37
RB
7840 unsigned HOST_WIDE_INT
7841 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7842 for (j = 0; j < ncopies; j++)
7843 {
7b5fc413 7844 if (nloads > 1)
e09b4c37 7845 vec_alloc (v, nloads);
e1bd7296 7846 stmt_vec_info new_stmt_info = NULL;
e09b4c37 7847 for (i = 0; i < nloads; i++)
7b5fc413 7848 {
e09b4c37 7849 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7850 group_el * elsz + cst_offset);
19986382 7851 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
89fa689a 7852 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
e1bd7296
RS
7853 gassign *new_stmt
7854 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7855 new_stmt_info
86a91c0a 7856 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
e09b4c37
RB
7857 if (nloads > 1)
7858 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7859 gimple_assign_lhs (new_stmt));
7860
7861 group_el += lnel;
7862 if (! slp
7863 || group_el == group_size)
7b5fc413 7864 {
e09b4c37
RB
7865 tree newoff = copy_ssa_name (running_off);
7866 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7867 running_off, stride_step);
86a91c0a 7868 vect_finish_stmt_generation (stmt_info, incr, gsi);
7b5fc413
RB
7869
7870 running_off = newoff;
e09b4c37 7871 group_el = 0;
7b5fc413 7872 }
7b5fc413 7873 }
e09b4c37 7874 if (nloads > 1)
7d75abc8 7875 {
ea60dd34 7876 tree vec_inv = build_constructor (lvectype, v);
86a91c0a 7877 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
e1bd7296 7878 new_stmt_info = vinfo->lookup_def (new_temp);
ea60dd34
RB
7879 if (lvectype != vectype)
7880 {
e1bd7296
RS
7881 gassign *new_stmt
7882 = gimple_build_assign (make_ssa_name (vectype),
7883 VIEW_CONVERT_EXPR,
7884 build1 (VIEW_CONVERT_EXPR,
7885 vectype, new_temp));
7886 new_stmt_info
86a91c0a 7887 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
ea60dd34 7888 }
7d75abc8
MM
7889 }
7890
7b5fc413 7891 if (slp)
b266b968 7892 {
b266b968 7893 if (slp_perm)
e1bd7296 7894 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
66c16fd9 7895 else
e1bd7296 7896 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
b266b968 7897 }
7d75abc8 7898 else
225ce44b
RB
7899 {
7900 if (j == 0)
e1bd7296 7901 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
225ce44b 7902 else
e1bd7296
RS
7903 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7904 prev_stmt_info = new_stmt_info;
225ce44b 7905 }
7d75abc8 7906 }
b266b968 7907 if (slp_perm)
29afecdf
RB
7908 {
7909 unsigned n_perms;
7910 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7911 slp_node_instance, false, &n_perms);
7912 }
7d75abc8
MM
7913 return true;
7914 }
aec7ae7d 7915
b5ec4de7
RS
7916 if (memory_access_type == VMAT_GATHER_SCATTER
7917 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7918 grouped_load = false;
7919
0d0293ac 7920 if (grouped_load)
ebfd146a 7921 {
bffb8014
RS
7922 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7923 group_size = DR_GROUP_SIZE (first_stmt_info);
4f0a0218 7924 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7925 without permutation. */
7926 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
bffb8014 7927 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4f0a0218
RB
7928 /* For BB vectorization always use the first stmt to base
7929 the data ref pointer on. */
7930 if (bb_vinfo)
b9787581 7931 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7932
ebfd146a 7933 /* Check if the chain of loads is already vectorized. */
bffb8014 7934 if (STMT_VINFO_VEC_STMT (first_stmt_info)
01d8bf07
RB
7935 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7936 ??? But we can only do so if there is exactly one
7937 as we have no way to get at the rest. Leave the CSE
7938 opportunity alone.
7939 ??? With the group load eventually participating
7940 in multiple different permutations (having multiple
7941 slp nodes which refer to the same group) the CSE
7942 is even wrong code. See PR56270. */
7943 && !slp)
ebfd146a
IR
7944 {
7945 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7946 return true;
7947 }
89fa689a 7948 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9b999e8c 7949 group_gap_adj = 0;
ebfd146a
IR
7950
7951 /* VEC_NUM is the number of vect stmts to be created for this group. */
7952 if (slp)
7953 {
0d0293ac 7954 grouped_load = false;
ab7e60ce
RS
7955 /* If an SLP permutation is from N elements to N elements,
7956 and if one vector holds a whole number of N, we can load
7957 the inputs to the permutation in the same way as an
7958 unpermuted sequence. In other cases we need to load the
7959 whole group, not only the number of vector stmts the
7960 permutation result fits in. */
7961 if (slp_perm
7962 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
7963 || !multiple_p (nunits, group_size)))
b267968e 7964 {
ab7e60ce
RS
7965 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
7966 variable VF; see vect_transform_slp_perm_load. */
d9f21f6a 7967 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7968 unsigned int const_nunits = nunits.to_constant ();
7969 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7970 group_gap_adj = vf * group_size - nunits * vec_num;
7971 }
91ff1504 7972 else
b267968e
RB
7973 {
7974 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7975 group_gap_adj
7976 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7977 }
a70d6342 7978 }
ebfd146a 7979 else
9b999e8c 7980 vec_num = group_size;
44fc7854 7981
bffb8014 7982 ref_type = get_group_alias_ptr_type (first_stmt_info);
ebfd146a
IR
7983 }
7984 else
7985 {
bffb8014 7986 first_stmt_info = stmt_info;
89fa689a 7987 first_dr_info = dr_info;
ebfd146a 7988 group_size = vec_num = 1;
9b999e8c 7989 group_gap_adj = 0;
89fa689a 7990 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
ebfd146a
IR
7991 }
7992
89fa689a
RS
7993 alignment_support_scheme
7994 = vect_supportable_dr_alignment (first_dr_info, false);
ebfd146a 7995 gcc_assert (alignment_support_scheme);
70088b95
RS
7996 vec_loop_masks *loop_masks
7997 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7998 ? &LOOP_VINFO_MASKS (loop_vinfo)
7999 : NULL);
7cfb4d93
RS
8000 /* Targets with store-lane instructions must not require explicit
8001 realignment. vect_supportable_dr_alignment always returns either
8002 dr_aligned or dr_unaligned_supported for masked operations. */
8003 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8004 && !mask
70088b95 8005 && !loop_masks)
272c6793
RS
8006 || alignment_support_scheme == dr_aligned
8007 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
8008
8009 /* In case the vectorization factor (VF) is bigger than the number
8010 of elements that we can fit in a vectype (nunits), we have to generate
8011 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 8012 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 8013 from one copy of the vector stmt to the next, in the field
ff802fa1 8014 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 8015 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
8016 stmts that use the defs of the current stmt. The example below
8017 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8018 need to create 4 vectorized stmts):
ebfd146a
IR
8019
8020 before vectorization:
8021 RELATED_STMT VEC_STMT
8022 S1: x = memref - -
8023 S2: z = x + 1 - -
8024
8025 step 1: vectorize stmt S1:
8026 We first create the vector stmt VS1_0, and, as usual, record a
8027 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8028 Next, we create the vector stmt VS1_1, and record a pointer to
8029 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 8030 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
8031 stmts and pointers:
8032 RELATED_STMT VEC_STMT
8033 VS1_0: vx0 = memref0 VS1_1 -
8034 VS1_1: vx1 = memref1 VS1_2 -
8035 VS1_2: vx2 = memref2 VS1_3 -
8036 VS1_3: vx3 = memref3 - -
8037 S1: x = load - VS1_0
8038 S2: z = x + 1 - -
8039
b8698a0f
L
8040 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8041 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
8042 stmt S2. */
8043
0d0293ac 8044 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
8045
8046 S1: x2 = &base + 2
8047 S2: x0 = &base
8048 S3: x1 = &base + 1
8049 S4: x3 = &base + 3
8050
b8698a0f 8051 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
8052 starting from the access of the first stmt of the chain:
8053
8054 VS1: vx0 = &base
8055 VS2: vx1 = &base + vec_size*1
8056 VS3: vx3 = &base + vec_size*2
8057 VS4: vx4 = &base + vec_size*3
8058
8059 Then permutation statements are generated:
8060
e2c83630
RH
8061 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8062 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
8063 ...
8064
8065 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8066 (the order of the data-refs in the output of vect_permute_load_chain
8067 corresponds to the order of scalar stmts in the interleaving chain - see
8068 the documentation of vect_permute_load_chain()).
8069 The generation of permutation stmts and recording them in
0d0293ac 8070 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8071
b8698a0f 8072 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8073 permutation stmts above are created for every copy. The result vector
8074 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8075 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8076
8077 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8078 on a target that supports unaligned accesses (dr_unaligned_supported)
8079 we generate the following code:
8080 p = initial_addr;
8081 indx = 0;
8082 loop {
8083 p = p + indx * vectype_size;
8084 vec_dest = *(p);
8085 indx = indx + 1;
8086 }
8087
8088 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8089 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8090 then generate the following code, in which the data in each iteration is
8091 obtained by two vector loads, one from the previous iteration, and one
8092 from the current iteration:
8093 p1 = initial_addr;
8094 msq_init = *(floor(p1))
8095 p2 = initial_addr + VS - 1;
8096 realignment_token = call target_builtin;
8097 indx = 0;
8098 loop {
8099 p2 = p2 + indx * vectype_size
8100 lsq = *(floor(p2))
8101 vec_dest = realign_load (msq, lsq, realignment_token)
8102 indx = indx + 1;
8103 msq = lsq;
8104 } */
8105
8106 /* If the misalignment remains the same throughout the execution of the
8107 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8108 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8109 This can only occur when vectorizing memory accesses in the inner-loop
8110 nested within an outer-loop that is being vectorized. */
8111
d1e4b493 8112 if (nested_in_vect_loop
89fa689a 8113 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
cf098191 8114 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8115 {
8116 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8117 compute_in_loop = true;
8118 }
8119
8120 if ((alignment_support_scheme == dr_explicit_realign_optimized
8121 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8122 && !compute_in_loop)
ebfd146a 8123 {
bffb8014 8124 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
ebfd146a
IR
8125 alignment_support_scheme, NULL_TREE,
8126 &at_loop);
8127 if (alignment_support_scheme == dr_explicit_realign_optimized)
8128 {
538dd0b7 8129 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8130 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8131 size_one_node);
ebfd146a
IR
8132 }
8133 }
8134 else
8135 at_loop = loop;
8136
62da9e14 8137 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8138 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8139
ab2fc782
RS
8140 tree bump;
8141 tree vec_offset = NULL_TREE;
8142 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8143 {
8144 aggr_type = NULL_TREE;
8145 bump = NULL_TREE;
8146 }
8147 else if (memory_access_type == VMAT_GATHER_SCATTER)
8148 {
8149 aggr_type = elem_type;
86a91c0a 8150 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
ab2fc782
RS
8151 &bump, &vec_offset);
8152 }
272c6793 8153 else
ab2fc782
RS
8154 {
8155 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8156 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8157 else
8158 aggr_type = vectype;
89fa689a
RS
8159 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8160 memory_access_type);
ab2fc782 8161 }
272c6793 8162
c3a8f964 8163 tree vec_mask = NULL_TREE;
ebfd146a 8164 prev_stmt_info = NULL;
4d694b27 8165 poly_uint64 group_elt = 0;
ebfd146a 8166 for (j = 0; j < ncopies; j++)
b8698a0f 8167 {
e1bd7296 8168 stmt_vec_info new_stmt_info = NULL;
272c6793 8169 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8170 if (j == 0)
74bf76ed
JJ
8171 {
8172 bool simd_lane_access_p
8173 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8174 if (simd_lane_access_p
89fa689a
RS
8175 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8176 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8177 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8178 && integer_zerop (DR_INIT (first_dr_info->dr))
74bf76ed 8179 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8180 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8181 && (alignment_support_scheme == dr_aligned
8182 || alignment_support_scheme == dr_unaligned_supported))
8183 {
89fa689a 8184 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
44fc7854 8185 dataref_offset = build_int_cst (ref_type, 0);
74bf76ed 8186 }
b9787581 8187 else if (first_stmt_info_for_drptr
bffb8014 8188 && first_stmt_info != first_stmt_info_for_drptr)
4f0a0218
RB
8189 {
8190 dataref_ptr
b9787581
RS
8191 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8192 aggr_type, at_loop, offset, &dummy,
8193 gsi, &ptr_incr, simd_lane_access_p,
2d4bca81 8194 byte_offset, bump);
4f0a0218
RB
8195 /* Adjust the pointer by the difference to first_stmt. */
8196 data_reference_p ptrdr
b9787581 8197 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
89fa689a
RS
8198 tree diff
8199 = fold_convert (sizetype,
8200 size_binop (MINUS_EXPR,
8201 DR_INIT (first_dr_info->dr),
8202 DR_INIT (ptrdr)));
4f0a0218 8203 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
86a91c0a 8204 stmt_info, diff);
4f0a0218 8205 }
bfaa08b7 8206 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2d4bca81
RS
8207 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8208 &dataref_ptr, &vec_offset);
74bf76ed
JJ
8209 else
8210 dataref_ptr
bffb8014 8211 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
74bf76ed 8212 offset, &dummy, gsi, &ptr_incr,
2d4bca81 8213 simd_lane_access_p,
ab2fc782 8214 byte_offset, bump);
c3a8f964 8215 if (mask)
86a91c0a 8216 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
c3a8f964 8217 mask_vectype);
74bf76ed 8218 }
ebfd146a 8219 else
c3a8f964
RS
8220 {
8221 if (dataref_offset)
8222 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8223 bump);
bfaa08b7 8224 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
e4057a39 8225 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
c3a8f964 8226 else
ab2fc782 8227 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
86a91c0a 8228 stmt_info, bump);
c3a8f964 8229 if (mask)
e4057a39 8230 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
c3a8f964 8231 }
ebfd146a 8232
0d0293ac 8233 if (grouped_load || slp_perm)
9771b263 8234 dr_chain.create (vec_num);
5ce1ee7f 8235
2de001ee 8236 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8237 {
272c6793
RS
8238 tree vec_array;
8239
8240 vec_array = create_vector_array (vectype, vec_num);
8241
7cfb4d93 8242 tree final_mask = NULL_TREE;
70088b95
RS
8243 if (loop_masks)
8244 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8245 vectype, j);
7cfb4d93
RS
8246 if (vec_mask)
8247 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8248 vec_mask, gsi);
8249
7e11fc7f 8250 gcall *call;
7cfb4d93 8251 if (final_mask)
7e11fc7f
RS
8252 {
8253 /* Emit:
8254 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8255 VEC_MASK). */
8256 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8257 tree alias_ptr = build_int_cst (ref_type, align);
8258 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8259 dataref_ptr, alias_ptr,
7cfb4d93 8260 final_mask);
7e11fc7f
RS
8261 }
8262 else
8263 {
8264 /* Emit:
8265 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8266 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8267 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8268 }
a844293d
RS
8269 gimple_call_set_lhs (call, vec_array);
8270 gimple_call_set_nothrow (call, true);
86a91c0a 8271 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
ebfd146a 8272
272c6793
RS
8273 /* Extract each vector into an SSA_NAME. */
8274 for (i = 0; i < vec_num; i++)
ebfd146a 8275 {
86a91c0a 8276 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
272c6793 8277 vec_array, i);
9771b263 8278 dr_chain.quick_push (new_temp);
272c6793
RS
8279 }
8280
8281 /* Record the mapping between SSA_NAMEs and statements. */
86a91c0a 8282 vect_record_grouped_load_vectors (stmt_info, dr_chain);
3ba4ff41
RS
8283
8284 /* Record that VEC_ARRAY is now dead. */
86a91c0a 8285 vect_clobber_variable (stmt_info, gsi, vec_array);
272c6793
RS
8286 }
8287 else
8288 {
8289 for (i = 0; i < vec_num; i++)
8290 {
7cfb4d93 8291 tree final_mask = NULL_TREE;
70088b95 8292 if (loop_masks
7cfb4d93 8293 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8294 final_mask = vect_get_loop_mask (gsi, loop_masks,
8295 vec_num * ncopies,
7cfb4d93
RS
8296 vectype, vec_num * j + i);
8297 if (vec_mask)
8298 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8299 vec_mask, gsi);
8300
272c6793
RS
8301 if (i > 0)
8302 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
86a91c0a 8303 stmt_info, bump);
272c6793
RS
8304
8305 /* 2. Create the vector-load in the loop. */
e1bd7296 8306 gimple *new_stmt = NULL;
272c6793
RS
8307 switch (alignment_support_scheme)
8308 {
8309 case dr_aligned:
8310 case dr_unaligned_supported:
be1ac4ec 8311 {
644ffefd
MJ
8312 unsigned int align, misalign;
8313
bfaa08b7
RS
8314 if (memory_access_type == VMAT_GATHER_SCATTER)
8315 {
8316 tree scale = size_int (gs_info.scale);
8317 gcall *call;
70088b95 8318 if (loop_masks)
bfaa08b7
RS
8319 call = gimple_build_call_internal
8320 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8321 vec_offset, scale, final_mask);
8322 else
8323 call = gimple_build_call_internal
8324 (IFN_GATHER_LOAD, 3, dataref_ptr,
8325 vec_offset, scale);
8326 gimple_call_set_nothrow (call, true);
8327 new_stmt = call;
8328 data_ref = NULL_TREE;
8329 break;
8330 }
8331
89fa689a 8332 align = DR_TARGET_ALIGNMENT (dr_info);
272c6793
RS
8333 if (alignment_support_scheme == dr_aligned)
8334 {
89fa689a 8335 gcc_assert (aligned_access_p (first_dr_info));
644ffefd 8336 misalign = 0;
272c6793 8337 }
89fa689a 8338 else if (DR_MISALIGNMENT (first_dr_info) == -1)
272c6793 8339 {
89fa689a
RS
8340 align = dr_alignment
8341 (vect_dr_behavior (first_dr_info));
52639a61 8342 misalign = 0;
272c6793
RS
8343 }
8344 else
89fa689a 8345 misalign = DR_MISALIGNMENT (first_dr_info);
aed93b23
RB
8346 if (dataref_offset == NULL_TREE
8347 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8348 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8349 align, misalign);
c3a8f964 8350
7cfb4d93 8351 if (final_mask)
c3a8f964
RS
8352 {
8353 align = least_bit_hwi (misalign | align);
8354 tree ptr = build_int_cst (ref_type, align);
8355 gcall *call
8356 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8357 dataref_ptr, ptr,
7cfb4d93 8358 final_mask);
c3a8f964
RS
8359 gimple_call_set_nothrow (call, true);
8360 new_stmt = call;
8361 data_ref = NULL_TREE;
8362 }
8363 else
8364 {
8365 data_ref
8366 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8367 dataref_offset
8368 ? dataref_offset
8369 : build_int_cst (ref_type, 0));
8370 if (alignment_support_scheme == dr_aligned)
8371 ;
89fa689a 8372 else if (DR_MISALIGNMENT (first_dr_info) == -1)
c3a8f964
RS
8373 TREE_TYPE (data_ref)
8374 = build_aligned_type (TREE_TYPE (data_ref),
8375 align * BITS_PER_UNIT);
8376 else
8377 TREE_TYPE (data_ref)
8378 = build_aligned_type (TREE_TYPE (data_ref),
8379 TYPE_ALIGN (elem_type));
8380 }
272c6793 8381 break;
be1ac4ec 8382 }
272c6793 8383 case dr_explicit_realign:
267d3070 8384 {
272c6793 8385 tree ptr, bump;
272c6793 8386
d88981fc 8387 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8388
8389 if (compute_in_loop)
bffb8014 8390 msq = vect_setup_realignment (first_stmt_info, gsi,
272c6793
RS
8391 &realignment_token,
8392 dr_explicit_realign,
8393 dataref_ptr, NULL);
8394
aed93b23
RB
8395 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8396 ptr = copy_ssa_name (dataref_ptr);
8397 else
8398 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
89fa689a 8399 unsigned int align = DR_TARGET_ALIGNMENT (first_dr_info);
0d0e4a03
JJ
8400 new_stmt = gimple_build_assign
8401 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8402 build_int_cst
8403 (TREE_TYPE (dataref_ptr),
f702e7d4 8404 -(HOST_WIDE_INT) align));
86a91c0a 8405 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
272c6793
RS
8406 data_ref
8407 = build2 (MEM_REF, vectype, ptr,
44fc7854 8408 build_int_cst (ref_type, 0));
89fa689a 8409 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
272c6793
RS
8410 vec_dest = vect_create_destination_var (scalar_dest,
8411 vectype);
8412 new_stmt = gimple_build_assign (vec_dest, data_ref);
8413 new_temp = make_ssa_name (vec_dest, new_stmt);
8414 gimple_assign_set_lhs (new_stmt, new_temp);
86a91c0a
RS
8415 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8416 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8417 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
272c6793
RS
8418 msq = new_temp;
8419
d88981fc 8420 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8421 TYPE_SIZE_UNIT (elem_type));
d88981fc 8422 bump = size_binop (MINUS_EXPR, bump, size_one_node);
86a91c0a
RS
8423 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8424 stmt_info, bump);
0d0e4a03
JJ
8425 new_stmt = gimple_build_assign
8426 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8427 build_int_cst
f702e7d4 8428 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8429 ptr = copy_ssa_name (ptr, new_stmt);
272c6793 8430 gimple_assign_set_lhs (new_stmt, ptr);
86a91c0a 8431 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
272c6793
RS
8432 data_ref
8433 = build2 (MEM_REF, vectype, ptr,
44fc7854 8434 build_int_cst (ref_type, 0));
272c6793 8435 break;
267d3070 8436 }
272c6793 8437 case dr_explicit_realign_optimized:
f702e7d4
RS
8438 {
8439 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8440 new_temp = copy_ssa_name (dataref_ptr);
8441 else
8442 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
89fa689a 8443 unsigned int align = DR_TARGET_ALIGNMENT (first_dr_info);
f702e7d4
RS
8444 new_stmt = gimple_build_assign
8445 (new_temp, BIT_AND_EXPR, dataref_ptr,
8446 build_int_cst (TREE_TYPE (dataref_ptr),
8447 -(HOST_WIDE_INT) align));
86a91c0a 8448 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
f702e7d4
RS
8449 data_ref
8450 = build2 (MEM_REF, vectype, new_temp,
8451 build_int_cst (ref_type, 0));
8452 break;
8453 }
272c6793
RS
8454 default:
8455 gcc_unreachable ();
8456 }
ebfd146a 8457 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8458 /* DATA_REF is null if we've already built the statement. */
8459 if (data_ref)
19986382 8460 {
89fa689a 8461 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
19986382
RB
8462 new_stmt = gimple_build_assign (vec_dest, data_ref);
8463 }
ebfd146a 8464 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8465 gimple_set_lhs (new_stmt, new_temp);
e1bd7296 8466 new_stmt_info
86a91c0a 8467 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
ebfd146a 8468
272c6793
RS
8469 /* 3. Handle explicit realignment if necessary/supported.
8470 Create in loop:
8471 vec_dest = realign_load (msq, lsq, realignment_token) */
8472 if (alignment_support_scheme == dr_explicit_realign_optimized
8473 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8474 {
272c6793
RS
8475 lsq = gimple_assign_lhs (new_stmt);
8476 if (!realignment_token)
8477 realignment_token = dataref_ptr;
8478 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8479 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8480 msq, lsq, realignment_token);
272c6793
RS
8481 new_temp = make_ssa_name (vec_dest, new_stmt);
8482 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 8483 new_stmt_info
86a91c0a 8484 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
272c6793
RS
8485
8486 if (alignment_support_scheme == dr_explicit_realign_optimized)
8487 {
8488 gcc_assert (phi);
8489 if (i == vec_num - 1 && j == ncopies - 1)
8490 add_phi_arg (phi, lsq,
8491 loop_latch_edge (containing_loop),
9e227d60 8492 UNKNOWN_LOCATION);
272c6793
RS
8493 msq = lsq;
8494 }
ebfd146a 8495 }
ebfd146a 8496
62da9e14 8497 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8498 {
aec7ae7d
JJ
8499 tree perm_mask = perm_mask_for_reverse (vectype);
8500 new_temp = permute_vec_elements (new_temp, new_temp,
86a91c0a 8501 perm_mask, stmt_info, gsi);
e1bd7296 8502 new_stmt_info = vinfo->lookup_def (new_temp);
ebfd146a 8503 }
267d3070 8504
272c6793 8505 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8506 vect_transform_grouped_load (). */
8507 if (grouped_load || slp_perm)
9771b263 8508 dr_chain.quick_push (new_temp);
267d3070 8509
272c6793
RS
8510 /* Store vector loads in the corresponding SLP_NODE. */
8511 if (slp && !slp_perm)
e1bd7296 8512 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
b267968e
RB
8513
8514 /* With SLP permutation we load the gaps as well, without
8515 we need to skip the gaps after we manage to fully load
2c53b149 8516 all elements. group_gap_adj is DR_GROUP_SIZE here. */
b267968e 8517 group_elt += nunits;
d9f21f6a
RS
8518 if (maybe_ne (group_gap_adj, 0U)
8519 && !slp_perm
8520 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8521 {
d9f21f6a
RS
8522 poly_wide_int bump_val
8523 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8524 * group_gap_adj);
8e6cdc90 8525 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e 8526 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
86a91c0a 8527 stmt_info, bump);
b267968e
RB
8528 group_elt = 0;
8529 }
272c6793 8530 }
9b999e8c
RB
8531 /* Bump the vector pointer to account for a gap or for excess
8532 elements loaded for a permuted SLP load. */
d9f21f6a 8533 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8534 {
d9f21f6a
RS
8535 poly_wide_int bump_val
8536 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8537 * group_gap_adj);
8e6cdc90 8538 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26 8539 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
86a91c0a 8540 stmt_info, bump);
a64b9c26 8541 }
ebfd146a
IR
8542 }
8543
8544 if (slp && !slp_perm)
8545 continue;
8546
8547 if (slp_perm)
8548 {
29afecdf 8549 unsigned n_perms;
01d8bf07 8550 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8551 slp_node_instance, false,
8552 &n_perms))
ebfd146a 8553 {
9771b263 8554 dr_chain.release ();
ebfd146a
IR
8555 return false;
8556 }
8557 }
8558 else
8559 {
0d0293ac 8560 if (grouped_load)
ebfd146a 8561 {
2de001ee 8562 if (memory_access_type != VMAT_LOAD_STORE_LANES)
86a91c0a
RS
8563 vect_transform_grouped_load (stmt_info, dr_chain,
8564 group_size, gsi);
ebfd146a 8565 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8566 }
8567 else
8568 {
8569 if (j == 0)
e1bd7296 8570 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
ebfd146a 8571 else
e1bd7296
RS
8572 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8573 prev_stmt_info = new_stmt_info;
ebfd146a
IR
8574 }
8575 }
9771b263 8576 dr_chain.release ();
ebfd146a
IR
8577 }
8578
ebfd146a
IR
8579 return true;
8580}
8581
8582/* Function vect_is_simple_cond.
b8698a0f 8583
ebfd146a
IR
8584 Input:
8585 LOOP - the loop that is being vectorized.
8586 COND - Condition that is checked for simple use.
8587
e9e1d143
RG
8588 Output:
8589 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8590 *DTS - The def types for the arguments of the comparison
e9e1d143 8591
ebfd146a
IR
8592 Returns whether a COND can be vectorized. Checks whether
8593 condition operands are supportable using vec_is_simple_use. */
8594
87aab9b2 8595static bool
4fc5ebf1 8596vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8597 tree *comp_vectype, enum vect_def_type *dts,
8598 tree vectype)
ebfd146a
IR
8599{
8600 tree lhs, rhs;
e9e1d143 8601 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8602
a414c77f
IE
8603 /* Mask case. */
8604 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8605 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f 8606 {
894dd753 8607 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
a414c77f
IE
8608 || !*comp_vectype
8609 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8610 return false;
8611 return true;
8612 }
8613
ebfd146a
IR
8614 if (!COMPARISON_CLASS_P (cond))
8615 return false;
8616
8617 lhs = TREE_OPERAND (cond, 0);
8618 rhs = TREE_OPERAND (cond, 1);
8619
8620 if (TREE_CODE (lhs) == SSA_NAME)
8621 {
894dd753 8622 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
ebfd146a
IR
8623 return false;
8624 }
4fc5ebf1
JG
8625 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8626 || TREE_CODE (lhs) == FIXED_CST)
8627 dts[0] = vect_constant_def;
8628 else
ebfd146a
IR
8629 return false;
8630
8631 if (TREE_CODE (rhs) == SSA_NAME)
8632 {
894dd753 8633 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
ebfd146a
IR
8634 return false;
8635 }
4fc5ebf1
JG
8636 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8637 || TREE_CODE (rhs) == FIXED_CST)
8638 dts[1] = vect_constant_def;
8639 else
ebfd146a
IR
8640 return false;
8641
28b33016 8642 if (vectype1 && vectype2
928686b1
RS
8643 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8644 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8645 return false;
8646
e9e1d143 8647 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8648 /* Invariant comparison. */
4515e413 8649 if (! *comp_vectype && vectype)
8da4c8d8
RB
8650 {
8651 tree scalar_type = TREE_TYPE (lhs);
8652 /* If we can widen the comparison to match vectype do so. */
8653 if (INTEGRAL_TYPE_P (scalar_type)
8654 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8655 TYPE_SIZE (TREE_TYPE (vectype))))
8656 scalar_type = build_nonstandard_integer_type
8657 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8658 TYPE_UNSIGNED (scalar_type));
8659 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8660 }
8661
ebfd146a
IR
8662 return true;
8663}
8664
8665/* vectorizable_condition.
8666
32e8e429
RS
8667 Check if STMT_INFO is conditional modify expression that can be vectorized.
8668 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
b8698a0f 8669 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8670 at GSI.
8671
32e8e429
RS
8672 When STMT_INFO is vectorized as a nested cycle, REDUC_DEF is the vector
8673 variable to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1,
8674 and in else clause if it is 2).
ebfd146a 8675
32e8e429 8676 Return true if STMT_INFO is vectorizable in this way. */
ebfd146a 8677
4bbe8262 8678bool
32e8e429 8679vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195
RS
8680 stmt_vec_info *vec_stmt, tree reduc_def,
8681 int reduc_index, slp_tree slp_node,
8682 stmt_vector_for_cost *cost_vec)
ebfd146a 8683{
e4057a39 8684 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
8685 tree scalar_dest = NULL_TREE;
8686 tree vec_dest = NULL_TREE;
01216d27
JJ
8687 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8688 tree then_clause, else_clause;
df11cc78 8689 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8690 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8691 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8692 tree vec_compare;
ebfd146a
IR
8693 tree new_temp;
8694 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8695 enum vect_def_type dts[4]
8696 = {vect_unknown_def_type, vect_unknown_def_type,
8697 vect_unknown_def_type, vect_unknown_def_type};
8698 int ndts = 4;
f7e531cf 8699 int ncopies;
01216d27 8700 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8701 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8702 int i, j;
8703 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8704 vec<tree> vec_oprnds0 = vNULL;
8705 vec<tree> vec_oprnds1 = vNULL;
8706 vec<tree> vec_oprnds2 = vNULL;
8707 vec<tree> vec_oprnds3 = vNULL;
74946978 8708 tree vec_cmp_type;
a414c77f 8709 bool masked = false;
b8698a0f 8710
f7e531cf
IR
8711 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8712 return false;
8713
bb6c2b68
RS
8714 vect_reduction_type reduction_type
8715 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8716 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8717 {
8718 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8719 return false;
ebfd146a 8720
af29617a
AH
8721 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8722 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8723 && reduc_def))
8724 return false;
ebfd146a 8725
af29617a
AH
8726 /* FORNOW: not yet supported. */
8727 if (STMT_VINFO_LIVE_P (stmt_info))
8728 {
8729 if (dump_enabled_p ())
8730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8731 "value used after loop.\n");
8732 return false;
8733 }
ebfd146a
IR
8734 }
8735
8736 /* Is vectorizable conditional operation? */
32e8e429
RS
8737 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8738 if (!stmt)
ebfd146a
IR
8739 return false;
8740
8741 code = gimple_assign_rhs_code (stmt);
8742
8743 if (code != COND_EXPR)
8744 return false;
8745
465c8c19 8746 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8747 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8748
fce57248 8749 if (slp_node)
465c8c19
JJ
8750 ncopies = 1;
8751 else
e8f142e2 8752 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8753
8754 gcc_assert (ncopies >= 1);
8755 if (reduc_index && ncopies > 1)
8756 return false; /* FORNOW */
8757
4e71066d
RG
8758 cond_expr = gimple_assign_rhs1 (stmt);
8759 then_clause = gimple_assign_rhs2 (stmt);
8760 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8761
4fc5ebf1 8762 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8763 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8764 || !comp_vectype)
ebfd146a
IR
8765 return false;
8766
894dd753 8767 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
2947d3b2 8768 return false;
894dd753 8769 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
ebfd146a 8770 return false;
2947d3b2
IE
8771
8772 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8773 return false;
8774
8775 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8776 return false;
8777
28b33016
IE
8778 masked = !COMPARISON_CLASS_P (cond_expr);
8779 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8780
74946978
MP
8781 if (vec_cmp_type == NULL_TREE)
8782 return false;
784fb9b3 8783
01216d27
JJ
8784 cond_code = TREE_CODE (cond_expr);
8785 if (!masked)
8786 {
8787 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8788 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8789 }
8790
8791 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8792 {
8793 /* Boolean values may have another representation in vectors
8794 and therefore we prefer bit operations over comparison for
8795 them (which also works for scalar masks). We store opcodes
8796 to use in bitop1 and bitop2. Statement is vectorized as
8797 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8798 depending on bitop1 and bitop2 arity. */
8799 switch (cond_code)
8800 {
8801 case GT_EXPR:
8802 bitop1 = BIT_NOT_EXPR;
8803 bitop2 = BIT_AND_EXPR;
8804 break;
8805 case GE_EXPR:
8806 bitop1 = BIT_NOT_EXPR;
8807 bitop2 = BIT_IOR_EXPR;
8808 break;
8809 case LT_EXPR:
8810 bitop1 = BIT_NOT_EXPR;
8811 bitop2 = BIT_AND_EXPR;
8812 std::swap (cond_expr0, cond_expr1);
8813 break;
8814 case LE_EXPR:
8815 bitop1 = BIT_NOT_EXPR;
8816 bitop2 = BIT_IOR_EXPR;
8817 std::swap (cond_expr0, cond_expr1);
8818 break;
8819 case NE_EXPR:
8820 bitop1 = BIT_XOR_EXPR;
8821 break;
8822 case EQ_EXPR:
8823 bitop1 = BIT_XOR_EXPR;
8824 bitop2 = BIT_NOT_EXPR;
8825 break;
8826 default:
8827 return false;
8828 }
8829 cond_code = SSA_NAME;
8830 }
8831
b8698a0f 8832 if (!vec_stmt)
ebfd146a 8833 {
01216d27
JJ
8834 if (bitop1 != NOP_EXPR)
8835 {
8836 machine_mode mode = TYPE_MODE (comp_vectype);
8837 optab optab;
8838
8839 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8840 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8841 return false;
8842
8843 if (bitop2 != NOP_EXPR)
8844 {
8845 optab = optab_for_tree_code (bitop2, comp_vectype,
8846 optab_default);
8847 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8848 return false;
8849 }
8850 }
4fc5ebf1
JG
8851 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8852 cond_code))
8853 {
68435eb2
RB
8854 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8855 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8856 cost_vec);
4fc5ebf1
JG
8857 return true;
8858 }
8859 return false;
ebfd146a
IR
8860 }
8861
f7e531cf
IR
8862 /* Transform. */
8863
8864 if (!slp_node)
8865 {
9771b263
DN
8866 vec_oprnds0.create (1);
8867 vec_oprnds1.create (1);
8868 vec_oprnds2.create (1);
8869 vec_oprnds3.create (1);
f7e531cf 8870 }
ebfd146a
IR
8871
8872 /* Handle def. */
8873 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8874 if (reduction_type != EXTRACT_LAST_REDUCTION)
8875 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8876
8877 /* Handle cond expr. */
a855b1b1
MM
8878 for (j = 0; j < ncopies; j++)
8879 {
e1bd7296 8880 stmt_vec_info new_stmt_info = NULL;
a855b1b1
MM
8881 if (j == 0)
8882 {
f7e531cf
IR
8883 if (slp_node)
8884 {
00f96dc9
TS
8885 auto_vec<tree, 4> ops;
8886 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8887
a414c77f 8888 if (masked)
01216d27 8889 ops.safe_push (cond_expr);
a414c77f
IE
8890 else
8891 {
01216d27
JJ
8892 ops.safe_push (cond_expr0);
8893 ops.safe_push (cond_expr1);
a414c77f 8894 }
9771b263
DN
8895 ops.safe_push (then_clause);
8896 ops.safe_push (else_clause);
306b0c92 8897 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8898 vec_oprnds3 = vec_defs.pop ();
8899 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8900 if (!masked)
8901 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8902 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8903 }
8904 else
8905 {
a414c77f
IE
8906 if (masked)
8907 {
8908 vec_cond_lhs
86a91c0a 8909 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
a414c77f 8910 comp_vectype);
894dd753 8911 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
a414c77f
IE
8912 }
8913 else
8914 {
01216d27
JJ
8915 vec_cond_lhs
8916 = vect_get_vec_def_for_operand (cond_expr0,
86a91c0a 8917 stmt_info, comp_vectype);
894dd753 8918 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
01216d27
JJ
8919
8920 vec_cond_rhs
8921 = vect_get_vec_def_for_operand (cond_expr1,
86a91c0a 8922 stmt_info, comp_vectype);
894dd753 8923 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
a414c77f 8924 }
f7e531cf
IR
8925 if (reduc_index == 1)
8926 vec_then_clause = reduc_def;
8927 else
8928 {
8929 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
86a91c0a 8930 stmt_info);
894dd753 8931 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
f7e531cf
IR
8932 }
8933 if (reduc_index == 2)
8934 vec_else_clause = reduc_def;
8935 else
8936 {
8937 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
86a91c0a 8938 stmt_info);
894dd753 8939 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
f7e531cf 8940 }
a855b1b1
MM
8941 }
8942 }
8943 else
8944 {
a414c77f 8945 vec_cond_lhs
e4057a39 8946 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
a414c77f
IE
8947 if (!masked)
8948 vec_cond_rhs
e4057a39 8949 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
a414c77f 8950
e4057a39 8951 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9771b263 8952 vec_oprnds2.pop ());
e4057a39 8953 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9771b263 8954 vec_oprnds3.pop ());
f7e531cf
IR
8955 }
8956
8957 if (!slp_node)
8958 {
9771b263 8959 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8960 if (!masked)
8961 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8962 vec_oprnds2.quick_push (vec_then_clause);
8963 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8964 }
8965
9dc3f7de 8966 /* Arguments are ready. Create the new vector stmt. */
9771b263 8967 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8968 {
9771b263
DN
8969 vec_then_clause = vec_oprnds2[i];
8970 vec_else_clause = vec_oprnds3[i];
a855b1b1 8971
a414c77f
IE
8972 if (masked)
8973 vec_compare = vec_cond_lhs;
8974 else
8975 {
8976 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8977 if (bitop1 == NOP_EXPR)
8978 vec_compare = build2 (cond_code, vec_cmp_type,
8979 vec_cond_lhs, vec_cond_rhs);
8980 else
8981 {
8982 new_temp = make_ssa_name (vec_cmp_type);
e1bd7296 8983 gassign *new_stmt;
01216d27
JJ
8984 if (bitop1 == BIT_NOT_EXPR)
8985 new_stmt = gimple_build_assign (new_temp, bitop1,
8986 vec_cond_rhs);
8987 else
8988 new_stmt
8989 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8990 vec_cond_rhs);
86a91c0a 8991 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
01216d27
JJ
8992 if (bitop2 == NOP_EXPR)
8993 vec_compare = new_temp;
8994 else if (bitop2 == BIT_NOT_EXPR)
8995 {
8996 /* Instead of doing ~x ? y : z do x ? z : y. */
8997 vec_compare = new_temp;
8998 std::swap (vec_then_clause, vec_else_clause);
8999 }
9000 else
9001 {
9002 vec_compare = make_ssa_name (vec_cmp_type);
9003 new_stmt
9004 = gimple_build_assign (vec_compare, bitop2,
9005 vec_cond_lhs, new_temp);
86a91c0a 9006 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
01216d27
JJ
9007 }
9008 }
a414c77f 9009 }
bb6c2b68
RS
9010 if (reduction_type == EXTRACT_LAST_REDUCTION)
9011 {
9012 if (!is_gimple_val (vec_compare))
9013 {
9014 tree vec_compare_name = make_ssa_name (vec_cmp_type);
e1bd7296
RS
9015 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9016 vec_compare);
86a91c0a 9017 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
bb6c2b68
RS
9018 vec_compare = vec_compare_name;
9019 }
9020 gcc_assert (reduc_index == 2);
e1bd7296 9021 gcall *new_stmt = gimple_build_call_internal
bb6c2b68
RS
9022 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9023 vec_then_clause);
9024 gimple_call_set_lhs (new_stmt, scalar_dest);
9025 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
86a91c0a
RS
9026 if (stmt_info->stmt == gsi_stmt (*gsi))
9027 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
bb6c2b68
RS
9028 else
9029 {
9030 /* In this case we're moving the definition to later in the
9031 block. That doesn't matter because the only uses of the
9032 lhs are in phi statements. */
86a91c0a
RS
9033 gimple_stmt_iterator old_gsi
9034 = gsi_for_stmt (stmt_info->stmt);
bb6c2b68 9035 gsi_remove (&old_gsi, true);
e1bd7296 9036 new_stmt_info
86a91c0a 9037 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
bb6c2b68
RS
9038 }
9039 }
9040 else
9041 {
9042 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
9043 gassign *new_stmt
9044 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9045 vec_then_clause, vec_else_clause);
9046 new_stmt_info
86a91c0a 9047 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
bb6c2b68 9048 }
f7e531cf 9049 if (slp_node)
e1bd7296 9050 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
f7e531cf
IR
9051 }
9052
9053 if (slp_node)
9054 continue;
9055
e1bd7296
RS
9056 if (j == 0)
9057 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9058 else
9059 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
f7e531cf 9060
e1bd7296 9061 prev_stmt_info = new_stmt_info;
a855b1b1 9062 }
b8698a0f 9063
9771b263
DN
9064 vec_oprnds0.release ();
9065 vec_oprnds1.release ();
9066 vec_oprnds2.release ();
9067 vec_oprnds3.release ();
f7e531cf 9068
ebfd146a
IR
9069 return true;
9070}
9071
42fd8198
IE
9072/* vectorizable_comparison.
9073
32e8e429
RS
9074 Check if STMT_INFO is comparison expression that can be vectorized.
9075 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
42fd8198
IE
9076 comparison, put it in VEC_STMT, and insert it at GSI.
9077
32e8e429 9078 Return true if STMT_INFO is vectorizable in this way. */
42fd8198 9079
fce57248 9080static bool
32e8e429 9081vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195 9082 stmt_vec_info *vec_stmt, tree reduc_def,
68435eb2 9083 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198 9084{
e4057a39 9085 vec_info *vinfo = stmt_info->vinfo;
42fd8198 9086 tree lhs, rhs1, rhs2;
42fd8198
IE
9087 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9089 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9090 tree new_temp;
9091 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9092 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9093 int ndts = 2;
928686b1 9094 poly_uint64 nunits;
42fd8198 9095 int ncopies;
49e76ff1 9096 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9097 stmt_vec_info prev_stmt_info = NULL;
9098 int i, j;
9099 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9100 vec<tree> vec_oprnds0 = vNULL;
9101 vec<tree> vec_oprnds1 = vNULL;
42fd8198
IE
9102 tree mask_type;
9103 tree mask;
9104
c245362b
IE
9105 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9106 return false;
9107
30480bcd 9108 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9109 return false;
9110
9111 mask_type = vectype;
9112 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9113
fce57248 9114 if (slp_node)
42fd8198
IE
9115 ncopies = 1;
9116 else
e8f142e2 9117 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9118
9119 gcc_assert (ncopies >= 1);
42fd8198
IE
9120 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9121 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9122 && reduc_def))
9123 return false;
9124
9125 if (STMT_VINFO_LIVE_P (stmt_info))
9126 {
9127 if (dump_enabled_p ())
9128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9129 "value used after loop.\n");
9130 return false;
9131 }
9132
32e8e429
RS
9133 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9134 if (!stmt)
42fd8198
IE
9135 return false;
9136
9137 code = gimple_assign_rhs_code (stmt);
9138
9139 if (TREE_CODE_CLASS (code) != tcc_comparison)
9140 return false;
9141
9142 rhs1 = gimple_assign_rhs1 (stmt);
9143 rhs2 = gimple_assign_rhs2 (stmt);
9144
894dd753 9145 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
42fd8198
IE
9146 return false;
9147
894dd753 9148 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
42fd8198
IE
9149 return false;
9150
9151 if (vectype1 && vectype2
928686b1
RS
9152 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9153 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9154 return false;
9155
9156 vectype = vectype1 ? vectype1 : vectype2;
9157
9158 /* Invariant comparison. */
9159 if (!vectype)
9160 {
69a9a66f 9161 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9162 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9163 return false;
9164 }
928686b1 9165 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9166 return false;
9167
49e76ff1
IE
9168 /* Can't compare mask and non-mask types. */
9169 if (vectype1 && vectype2
9170 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9171 return false;
9172
9173 /* Boolean values may have another representation in vectors
9174 and therefore we prefer bit operations over comparison for
9175 them (which also works for scalar masks). We store opcodes
9176 to use in bitop1 and bitop2. Statement is vectorized as
9177 BITOP2 (rhs1 BITOP1 rhs2) or
9178 rhs1 BITOP2 (BITOP1 rhs2)
9179 depending on bitop1 and bitop2 arity. */
9180 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9181 {
9182 if (code == GT_EXPR)
9183 {
9184 bitop1 = BIT_NOT_EXPR;
9185 bitop2 = BIT_AND_EXPR;
9186 }
9187 else if (code == GE_EXPR)
9188 {
9189 bitop1 = BIT_NOT_EXPR;
9190 bitop2 = BIT_IOR_EXPR;
9191 }
9192 else if (code == LT_EXPR)
9193 {
9194 bitop1 = BIT_NOT_EXPR;
9195 bitop2 = BIT_AND_EXPR;
9196 std::swap (rhs1, rhs2);
264d951a 9197 std::swap (dts[0], dts[1]);
49e76ff1
IE
9198 }
9199 else if (code == LE_EXPR)
9200 {
9201 bitop1 = BIT_NOT_EXPR;
9202 bitop2 = BIT_IOR_EXPR;
9203 std::swap (rhs1, rhs2);
264d951a 9204 std::swap (dts[0], dts[1]);
49e76ff1
IE
9205 }
9206 else
9207 {
9208 bitop1 = BIT_XOR_EXPR;
9209 if (code == EQ_EXPR)
9210 bitop2 = BIT_NOT_EXPR;
9211 }
9212 }
9213
42fd8198
IE
9214 if (!vec_stmt)
9215 {
49e76ff1 9216 if (bitop1 == NOP_EXPR)
68435eb2
RB
9217 {
9218 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9219 return false;
9220 }
49e76ff1
IE
9221 else
9222 {
9223 machine_mode mode = TYPE_MODE (vectype);
9224 optab optab;
9225
9226 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9227 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9228 return false;
9229
9230 if (bitop2 != NOP_EXPR)
9231 {
9232 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9233 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9234 return false;
9235 }
49e76ff1 9236 }
68435eb2
RB
9237
9238 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9239 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9240 dts, ndts, slp_node, cost_vec);
9241 return true;
42fd8198
IE
9242 }
9243
9244 /* Transform. */
9245 if (!slp_node)
9246 {
9247 vec_oprnds0.create (1);
9248 vec_oprnds1.create (1);
9249 }
9250
9251 /* Handle def. */
9252 lhs = gimple_assign_lhs (stmt);
9253 mask = vect_create_destination_var (lhs, mask_type);
9254
9255 /* Handle cmp expr. */
9256 for (j = 0; j < ncopies; j++)
9257 {
e1bd7296 9258 stmt_vec_info new_stmt_info = NULL;
42fd8198
IE
9259 if (j == 0)
9260 {
9261 if (slp_node)
9262 {
9263 auto_vec<tree, 2> ops;
9264 auto_vec<vec<tree>, 2> vec_defs;
9265
9266 ops.safe_push (rhs1);
9267 ops.safe_push (rhs2);
306b0c92 9268 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9269 vec_oprnds1 = vec_defs.pop ();
9270 vec_oprnds0 = vec_defs.pop ();
9271 }
9272 else
9273 {
86a91c0a
RS
9274 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9275 vectype);
9276 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9277 vectype);
42fd8198
IE
9278 }
9279 }
9280 else
9281 {
e4057a39 9282 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
42fd8198 9283 vec_oprnds0.pop ());
e4057a39 9284 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
42fd8198
IE
9285 vec_oprnds1.pop ());
9286 }
9287
9288 if (!slp_node)
9289 {
9290 vec_oprnds0.quick_push (vec_rhs1);
9291 vec_oprnds1.quick_push (vec_rhs2);
9292 }
9293
9294 /* Arguments are ready. Create the new vector stmt. */
9295 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9296 {
9297 vec_rhs2 = vec_oprnds1[i];
9298
9299 new_temp = make_ssa_name (mask);
49e76ff1
IE
9300 if (bitop1 == NOP_EXPR)
9301 {
e1bd7296
RS
9302 gassign *new_stmt = gimple_build_assign (new_temp, code,
9303 vec_rhs1, vec_rhs2);
9304 new_stmt_info
86a91c0a 9305 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
49e76ff1
IE
9306 }
9307 else
9308 {
e1bd7296 9309 gassign *new_stmt;
49e76ff1
IE
9310 if (bitop1 == BIT_NOT_EXPR)
9311 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9312 else
9313 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9314 vec_rhs2);
e1bd7296 9315 new_stmt_info
86a91c0a 9316 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
49e76ff1
IE
9317 if (bitop2 != NOP_EXPR)
9318 {
9319 tree res = make_ssa_name (mask);
9320 if (bitop2 == BIT_NOT_EXPR)
9321 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9322 else
9323 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9324 new_temp);
e1bd7296 9325 new_stmt_info
86a91c0a 9326 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
49e76ff1
IE
9327 }
9328 }
42fd8198 9329 if (slp_node)
e1bd7296 9330 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
42fd8198
IE
9331 }
9332
9333 if (slp_node)
9334 continue;
9335
9336 if (j == 0)
e1bd7296 9337 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
42fd8198 9338 else
e1bd7296 9339 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
42fd8198 9340
e1bd7296 9341 prev_stmt_info = new_stmt_info;
42fd8198
IE
9342 }
9343
9344 vec_oprnds0.release ();
9345 vec_oprnds1.release ();
9346
9347 return true;
9348}
ebfd146a 9349
68a0f2ff
RS
9350/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9351 can handle all live statements in the node. Otherwise return true
82570274 9352 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
68a0f2ff
RS
9353 GSI and VEC_STMT are as for vectorizable_live_operation. */
9354
9355static bool
82570274 9356can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
1eede195 9357 slp_tree slp_node, stmt_vec_info *vec_stmt,
68435eb2 9358 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9359{
9360 if (slp_node)
9361 {
b9787581 9362 stmt_vec_info slp_stmt_info;
68a0f2ff 9363 unsigned int i;
b9787581 9364 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
68a0f2ff 9365 {
68a0f2ff 9366 if (STMT_VINFO_LIVE_P (slp_stmt_info)
b9787581 9367 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
68435eb2 9368 vec_stmt, cost_vec))
68a0f2ff
RS
9369 return false;
9370 }
9371 }
82570274
RS
9372 else if (STMT_VINFO_LIVE_P (stmt_info)
9373 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9374 vec_stmt, cost_vec))
68a0f2ff
RS
9375 return false;
9376
9377 return true;
9378}
9379
8644a673 9380/* Make sure the statement is vectorizable. */
ebfd146a 9381
f4ebbd24 9382opt_result
32e8e429
RS
9383vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9384 slp_tree node, slp_instance node_instance,
9385 stmt_vector_for_cost *cost_vec)
ebfd146a 9386{
6585ff8f 9387 vec_info *vinfo = stmt_info->vinfo;
a70d6342 9388 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9389 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9390 bool ok;
363477c0 9391 gimple_seq pattern_def_seq;
ebfd146a 9392
73fbfcad 9393 if (dump_enabled_p ())
3c2a8ed0
DM
9394 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9395 stmt_info->stmt);
ebfd146a 9396
86a91c0a 9397 if (gimple_has_volatile_ops (stmt_info->stmt))
f4ebbd24
DM
9398 return opt_result::failure_at (stmt_info->stmt,
9399 "not vectorized:"
9400 " stmt has volatile operands: %G\n",
9401 stmt_info->stmt);
b8698a0f 9402
d54a098e
RS
9403 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9404 && node == NULL
9405 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9406 {
9407 gimple_stmt_iterator si;
9408
9409 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9410 {
6585ff8f
RS
9411 stmt_vec_info pattern_def_stmt_info
9412 = vinfo->lookup_stmt (gsi_stmt (si));
9413 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9414 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
d54a098e
RS
9415 {
9416 /* Analyze def stmt of STMT if it's a pattern stmt. */
9417 if (dump_enabled_p ())
3c2a8ed0
DM
9418 dump_printf_loc (MSG_NOTE, vect_location,
9419 "==> examining pattern def statement: %G",
9420 pattern_def_stmt_info->stmt);
d54a098e 9421
f4ebbd24
DM
9422 opt_result res
9423 = vect_analyze_stmt (pattern_def_stmt_info,
9424 need_to_vectorize, node, node_instance,
9425 cost_vec);
9426 if (!res)
9427 return res;
d54a098e
RS
9428 }
9429 }
9430 }
9431
b8698a0f 9432 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9433 to include:
9434 - the COND_EXPR which is the loop exit condition
9435 - any LABEL_EXPRs in the loop
b8698a0f 9436 - computations that are used only for array indexing or loop control.
8644a673 9437 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9438 instance, therefore, all the statements are relevant.
ebfd146a 9439
d092494c 9440 Pattern statement needs to be analyzed instead of the original statement
83197f37 9441 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9442 statements. In basic blocks we are called from some SLP instance
9443 traversal, don't analyze pattern stmts instead, the pattern stmts
9444 already will be part of SLP instance. */
83197f37 9445
10681ce8 9446 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9447 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9448 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9449 {
9d5e7640 9450 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10681ce8
RS
9451 && pattern_stmt_info
9452 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9453 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9d5e7640 9454 {
83197f37 9455 /* Analyze PATTERN_STMT instead of the original stmt. */
10681ce8 9456 stmt_info = pattern_stmt_info;
73fbfcad 9457 if (dump_enabled_p ())
3c2a8ed0
DM
9458 dump_printf_loc (MSG_NOTE, vect_location,
9459 "==> examining pattern statement: %G",
9460 stmt_info->stmt);
9d5e7640
IR
9461 }
9462 else
9463 {
73fbfcad 9464 if (dump_enabled_p ())
e645e942 9465 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9466
f4ebbd24 9467 return opt_result::success ();
9d5e7640 9468 }
8644a673 9469 }
83197f37 9470 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9471 && node == NULL
10681ce8
RS
9472 && pattern_stmt_info
9473 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9474 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
83197f37
IR
9475 {
9476 /* Analyze PATTERN_STMT too. */
73fbfcad 9477 if (dump_enabled_p ())
3c2a8ed0
DM
9478 dump_printf_loc (MSG_NOTE, vect_location,
9479 "==> examining pattern statement: %G",
9480 pattern_stmt_info->stmt);
83197f37 9481
f4ebbd24
DM
9482 opt_result res
9483 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9484 node_instance, cost_vec);
9485 if (!res)
9486 return res;
83197f37 9487 }
ebfd146a 9488
8644a673
IR
9489 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9490 {
9491 case vect_internal_def:
9492 break;
ebfd146a 9493
8644a673 9494 case vect_reduction_def:
7c5222ff 9495 case vect_nested_cycle:
14a61437
RB
9496 gcc_assert (!bb_vinfo
9497 && (relevance == vect_used_in_outer
9498 || relevance == vect_used_in_outer_by_reduction
9499 || relevance == vect_used_by_reduction
b28ead45
AH
9500 || relevance == vect_unused_in_scope
9501 || relevance == vect_used_only_live));
8644a673
IR
9502 break;
9503
9504 case vect_induction_def:
e7baeb39
RB
9505 gcc_assert (!bb_vinfo);
9506 break;
9507
8644a673
IR
9508 case vect_constant_def:
9509 case vect_external_def:
9510 case vect_unknown_def_type:
9511 default:
9512 gcc_unreachable ();
9513 }
ebfd146a 9514
8644a673 9515 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9516 {
86a91c0a
RS
9517 tree type = gimple_expr_type (stmt_info->stmt);
9518 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9519 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
0136f8f0 9520 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
beb456c3 9521 || (call && gimple_call_lhs (call) == NULL_TREE));
8644a673 9522 *need_to_vectorize = true;
ebfd146a
IR
9523 }
9524
b1af7da6
RB
9525 if (PURE_SLP_STMT (stmt_info) && !node)
9526 {
9527 dump_printf_loc (MSG_NOTE, vect_location,
9528 "handled only by SLP analysis\n");
f4ebbd24 9529 return opt_result::success ();
b1af7da6
RB
9530 }
9531
9532 ok = true;
9533 if (!bb_vinfo
9534 && (STMT_VINFO_RELEVANT_P (stmt_info)
9535 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
bc37759a
RB
9536 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9537 -mveclibabi= takes preference over ibrary functions with
9538 the simd attribute. */
9539 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9540 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9541 cost_vec)
86a91c0a
RS
9542 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9543 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9544 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9545 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9546 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9547 cost_vec)
86a91c0a
RS
9548 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9549 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9550 node_instance, cost_vec)
9551 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9552 || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
68435eb2 9553 cost_vec)
86a91c0a
RS
9554 || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
9555 cost_vec));
b1af7da6
RB
9556 else
9557 {
9558 if (bb_vinfo)
bc37759a
RB
9559 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9560 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9561 cost_vec)
86a91c0a
RS
9562 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9563 cost_vec)
9564 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9565 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9566 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9567 cost_vec)
9568 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
68435eb2 9569 cost_vec)
86a91c0a
RS
9570 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9571 || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
68435eb2 9572 cost_vec)
86a91c0a 9573 || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
68435eb2 9574 cost_vec));
b1af7da6 9575 }
8644a673
IR
9576
9577 if (!ok)
f4ebbd24
DM
9578 return opt_result::failure_at (stmt_info->stmt,
9579 "not vectorized:"
9580 " relevant stmt not supported: %G",
9581 stmt_info->stmt);
ebfd146a 9582
8644a673
IR
9583 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9584 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9585 if (!bb_vinfo
9586 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
86a91c0a 9587 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
f4ebbd24
DM
9588 return opt_result::failure_at (stmt_info->stmt,
9589 "not vectorized:"
9590 " live stmt not supported: %G",
9591 stmt_info->stmt);
b8698a0f 9592
f4ebbd24 9593 return opt_result::success ();
ebfd146a
IR
9594}
9595
9596
9597/* Function vect_transform_stmt.
9598
32e8e429 9599 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
ebfd146a
IR
9600
9601bool
32e8e429 9602vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
b0b45e58 9603 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a 9604{
6585ff8f 9605 vec_info *vinfo = stmt_info->vinfo;
ebfd146a 9606 bool is_store = false;
1eede195 9607 stmt_vec_info vec_stmt = NULL;
ebfd146a 9608 bool done;
ebfd146a 9609
fce57248 9610 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
1eede195 9611 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9612
e57d9a82
RB
9613 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9614 && nested_in_vect_loop_p
9615 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
86a91c0a 9616 stmt_info));
e57d9a82 9617
32e8e429 9618 gimple *stmt = stmt_info->stmt;
ebfd146a
IR
9619 switch (STMT_VINFO_TYPE (stmt_info))
9620 {
9621 case type_demotion_vec_info_type:
ebfd146a 9622 case type_promotion_vec_info_type:
ebfd146a 9623 case type_conversion_vec_info_type:
86a91c0a
RS
9624 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9625 NULL);
ebfd146a
IR
9626 gcc_assert (done);
9627 break;
9628
9629 case induc_vec_info_type:
86a91c0a
RS
9630 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9631 NULL);
ebfd146a
IR
9632 gcc_assert (done);
9633 break;
9634
9dc3f7de 9635 case shift_vec_info_type:
86a91c0a 9636 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9637 gcc_assert (done);
9638 break;
9639
ebfd146a 9640 case op_vec_info_type:
86a91c0a
RS
9641 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9642 NULL);
ebfd146a
IR
9643 gcc_assert (done);
9644 break;
9645
9646 case assignment_vec_info_type:
86a91c0a
RS
9647 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9648 NULL);
ebfd146a
IR
9649 gcc_assert (done);
9650 break;
9651
9652 case load_vec_info_type:
86a91c0a 9653 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
68435eb2 9654 slp_node_instance, NULL);
ebfd146a
IR
9655 gcc_assert (done);
9656 break;
9657
9658 case store_vec_info_type:
86a91c0a 9659 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9660 gcc_assert (done);
0d0293ac 9661 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9662 {
9663 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9664 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9665 one are skipped, and there vec_stmt_info shouldn't be freed
9666 meanwhile. */
bffb8014 9667 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2c53b149 9668 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
ebfd146a 9669 is_store = true;
f307441a 9670 }
ebfd146a
IR
9671 else
9672 is_store = true;
9673 break;
9674
9675 case condition_vec_info_type:
86a91c0a
RS
9676 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, NULL, 0,
9677 slp_node, NULL);
ebfd146a
IR
9678 gcc_assert (done);
9679 break;
9680
42fd8198 9681 case comparison_vec_info_type:
86a91c0a
RS
9682 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt, NULL,
9683 slp_node, NULL);
42fd8198
IE
9684 gcc_assert (done);
9685 break;
9686
ebfd146a 9687 case call_vec_info_type:
86a91c0a 9688 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9689 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9690 break;
9691
0136f8f0 9692 case call_simd_clone_vec_info_type:
86a91c0a
RS
9693 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9694 slp_node, NULL);
0136f8f0
AH
9695 stmt = gsi_stmt (*gsi);
9696 break;
9697
ebfd146a 9698 case reduc_vec_info_type:
86a91c0a 9699 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
68435eb2 9700 slp_node_instance, NULL);
ebfd146a
IR
9701 gcc_assert (done);
9702 break;
9703
9704 default:
9705 if (!STMT_VINFO_LIVE_P (stmt_info))
9706 {
73fbfcad 9707 if (dump_enabled_p ())
78c60e3d 9708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9709 "stmt not supported.\n");
ebfd146a
IR
9710 gcc_unreachable ();
9711 }
9712 }
9713
225ce44b
RB
9714 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9715 This would break hybrid SLP vectorization. */
9716 if (slp_node)
d90f8440 9717 gcc_assert (!vec_stmt
1eede195 9718 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
225ce44b 9719
ebfd146a
IR
9720 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9721 is being vectorized, but outside the immediately enclosing loop. */
9722 if (vec_stmt
e57d9a82 9723 && nested_p
ebfd146a
IR
9724 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9725 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9726 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9727 vect_used_in_outer_by_reduction))
ebfd146a 9728 {
a70d6342
IR
9729 struct loop *innerloop = LOOP_VINFO_LOOP (
9730 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9731 imm_use_iterator imm_iter;
9732 use_operand_p use_p;
9733 tree scalar_dest;
ebfd146a 9734
73fbfcad 9735 if (dump_enabled_p ())
78c60e3d 9736 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9737 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9738
9739 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9740 (to be used when vectorizing outer-loop stmts that use the DEF of
9741 STMT). */
9742 if (gimple_code (stmt) == GIMPLE_PHI)
9743 scalar_dest = PHI_RESULT (stmt);
9744 else
4beb6642 9745 scalar_dest = gimple_get_lhs (stmt);
ebfd146a
IR
9746
9747 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6585ff8f
RS
9748 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9749 {
9750 stmt_vec_info exit_phi_info
9751 = vinfo->lookup_stmt (USE_STMT (use_p));
9752 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9753 }
ebfd146a
IR
9754 }
9755
9756 /* Handle stmts whose DEF is used outside the loop-nest that is
9757 being vectorized. */
68a0f2ff 9758 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9759 {
86a91c0a
RS
9760 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9761 NULL);
ebfd146a
IR
9762 gcc_assert (done);
9763 }
9764
9765 if (vec_stmt)
83197f37 9766 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9767
b8698a0f 9768 return is_store;
ebfd146a
IR
9769}
9770
9771
b8698a0f 9772/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9773 stmt_vec_info. */
9774
9775void
32e8e429 9776vect_remove_stores (stmt_vec_info first_stmt_info)
ebfd146a 9777{
b5b56c2a 9778 vec_info *vinfo = first_stmt_info->vinfo;
32e8e429 9779 stmt_vec_info next_stmt_info = first_stmt_info;
ebfd146a 9780
a1824cfd 9781 while (next_stmt_info)
ebfd146a 9782 {
a1824cfd 9783 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
211cd1e2 9784 next_stmt_info = vect_orig_stmt (next_stmt_info);
ebfd146a 9785 /* Free the attached stmt_vec_info and remove the stmt. */
b5b56c2a 9786 vinfo->remove_stmt (next_stmt_info);
a1824cfd 9787 next_stmt_info = tmp;
ebfd146a
IR
9788 }
9789}
9790
bb67d9c7 9791/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9792
bb67d9c7 9793 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9794 by the target. */
9795
c803b2a9 9796tree
86e36728 9797get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9798{
c7d97b28 9799 tree orig_scalar_type = scalar_type;
3bd8f481 9800 scalar_mode inner_mode;
ef4bddc2 9801 machine_mode simd_mode;
86e36728 9802 poly_uint64 nunits;
ebfd146a
IR
9803 tree vectype;
9804
3bd8f481
RS
9805 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9806 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9807 return NULL_TREE;
9808
3bd8f481 9809 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9810
7b7b1813
RG
9811 /* For vector types of elements whose mode precision doesn't
9812 match their types precision we use a element type of mode
9813 precision. The vectorization routines will have to make sure
48f2e373
RB
9814 they support the proper result truncation/extension.
9815 We also make sure to build vector types with INTEGER_TYPE
9816 component type only. */
6d7971b8 9817 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9818 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9819 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9820 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9821 TYPE_UNSIGNED (scalar_type));
6d7971b8 9822
ccbf5bb4
RG
9823 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9824 When the component mode passes the above test simply use a type
9825 corresponding to that mode. The theory is that any use that
9826 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9827 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9828 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9829 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9830
9831 /* We can't build a vector type of elements with alignment bigger than
9832 their size. */
dfc2e2ac 9833 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9834 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9835 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9836
dfc2e2ac
RB
9837 /* If we felt back to using the mode fail if there was
9838 no scalar type for it. */
9839 if (scalar_type == NULL_TREE)
9840 return NULL_TREE;
9841
bb67d9c7
RG
9842 /* If no size was supplied use the mode the target prefers. Otherwise
9843 lookup a vector mode of the specified size. */
86e36728 9844 if (known_eq (size, 0U))
bb67d9c7 9845 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9846 else if (!multiple_p (size, nbytes, &nunits)
9847 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9848 return NULL_TREE;
4c8fd8ac 9849 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9850 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9851 return NULL_TREE;
ebfd146a
IR
9852
9853 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9854
9855 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9856 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9857 return NULL_TREE;
ebfd146a 9858
c7d97b28
RB
9859 /* Re-attach the address-space qualifier if we canonicalized the scalar
9860 type. */
9861 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9862 return build_qualified_type
9863 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9864
ebfd146a
IR
9865 return vectype;
9866}
9867
86e36728 9868poly_uint64 current_vector_size;
bb67d9c7
RG
9869
9870/* Function get_vectype_for_scalar_type.
9871
9872 Returns the vector type corresponding to SCALAR_TYPE as supported
9873 by the target. */
9874
9875tree
9876get_vectype_for_scalar_type (tree scalar_type)
9877{
9878 tree vectype;
9879 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9880 current_vector_size);
9881 if (vectype
86e36728 9882 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9883 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9884 return vectype;
9885}
9886
42fd8198
IE
9887/* Function get_mask_type_for_scalar_type.
9888
9889 Returns the mask type corresponding to a result of comparison
9890 of vectors of specified SCALAR_TYPE as supported by target. */
9891
9892tree
9893get_mask_type_for_scalar_type (tree scalar_type)
9894{
9895 tree vectype = get_vectype_for_scalar_type (scalar_type);
9896
9897 if (!vectype)
9898 return NULL;
9899
9900 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9901 current_vector_size);
9902}
9903
b690cc0f
RG
9904/* Function get_same_sized_vectype
9905
9906 Returns a vector type corresponding to SCALAR_TYPE of size
9907 VECTOR_TYPE if supported by the target. */
9908
9909tree
bb67d9c7 9910get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9911{
2568d8a1 9912 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9913 return build_same_sized_truth_vector_type (vector_type);
9914
bb67d9c7
RG
9915 return get_vectype_for_scalar_type_and_size
9916 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9917}
9918
ebfd146a
IR
9919/* Function vect_is_simple_use.
9920
9921 Input:
81c40241
RB
9922 VINFO - the vect info of the loop or basic block that is being vectorized.
9923 OPERAND - operand in the loop or bb.
9924 Output:
fef96d8e
RS
9925 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
9926 case OPERAND is an SSA_NAME that is defined in the vectorizable region
9927 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
9928 the definition could be anywhere in the function
81c40241 9929 DT - the type of definition
ebfd146a
IR
9930
9931 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9932 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9933 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9934 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9935 is the case in reduction/induction computations).
9936 For basic blocks, supportable operands are constants and bb invariants.
9937 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9938
9939bool
894dd753 9940vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
fef96d8e 9941 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
b8698a0f 9942{
fef96d8e
RS
9943 if (def_stmt_info_out)
9944 *def_stmt_info_out = NULL;
894dd753
RS
9945 if (def_stmt_out)
9946 *def_stmt_out = NULL;
3fc356dc 9947 *dt = vect_unknown_def_type;
b8698a0f 9948
73fbfcad 9949 if (dump_enabled_p ())
ebfd146a 9950 {
78c60e3d
SS
9951 dump_printf_loc (MSG_NOTE, vect_location,
9952 "vect_is_simple_use: operand ");
30f502ed
RB
9953 if (TREE_CODE (operand) == SSA_NAME
9954 && !SSA_NAME_IS_DEFAULT_DEF (operand))
9955 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
9956 else
9957 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 9958 }
b8698a0f 9959
b758f602 9960 if (CONSTANT_CLASS_P (operand))
30f502ed
RB
9961 *dt = vect_constant_def;
9962 else if (is_gimple_min_invariant (operand))
9963 *dt = vect_external_def;
9964 else if (TREE_CODE (operand) != SSA_NAME)
9965 *dt = vect_unknown_def_type;
9966 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
8644a673 9967 *dt = vect_external_def;
ebfd146a
IR
9968 else
9969 {
30f502ed 9970 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
c98d0595
RS
9971 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
9972 if (!stmt_vinfo)
30f502ed
RB
9973 *dt = vect_external_def;
9974 else
0f8c840c 9975 {
6e6b18e5
RS
9976 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
9977 def_stmt = stmt_vinfo->stmt;
30f502ed
RB
9978 switch (gimple_code (def_stmt))
9979 {
9980 case GIMPLE_PHI:
9981 case GIMPLE_ASSIGN:
9982 case GIMPLE_CALL:
9983 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9984 break;
9985 default:
9986 *dt = vect_unknown_def_type;
9987 break;
9988 }
fef96d8e
RS
9989 if (def_stmt_info_out)
9990 *def_stmt_info_out = stmt_vinfo;
0f8c840c 9991 }
30f502ed
RB
9992 if (def_stmt_out)
9993 *def_stmt_out = def_stmt;
ebfd146a
IR
9994 }
9995
2e8ab70c
RB
9996 if (dump_enabled_p ())
9997 {
30f502ed 9998 dump_printf (MSG_NOTE, ", type of def: ");
2e8ab70c
RB
9999 switch (*dt)
10000 {
10001 case vect_uninitialized_def:
10002 dump_printf (MSG_NOTE, "uninitialized\n");
10003 break;
10004 case vect_constant_def:
10005 dump_printf (MSG_NOTE, "constant\n");
10006 break;
10007 case vect_external_def:
10008 dump_printf (MSG_NOTE, "external\n");
10009 break;
10010 case vect_internal_def:
10011 dump_printf (MSG_NOTE, "internal\n");
10012 break;
10013 case vect_induction_def:
10014 dump_printf (MSG_NOTE, "induction\n");
10015 break;
10016 case vect_reduction_def:
10017 dump_printf (MSG_NOTE, "reduction\n");
10018 break;
10019 case vect_double_reduction_def:
10020 dump_printf (MSG_NOTE, "double reduction\n");
10021 break;
10022 case vect_nested_cycle:
10023 dump_printf (MSG_NOTE, "nested cycle\n");
10024 break;
10025 case vect_unknown_def_type:
10026 dump_printf (MSG_NOTE, "unknown\n");
10027 break;
10028 }
10029 }
10030
81c40241 10031 if (*dt == vect_unknown_def_type)
ebfd146a 10032 {
73fbfcad 10033 if (dump_enabled_p ())
78c60e3d 10034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10035 "Unsupported pattern.\n");
ebfd146a
IR
10036 return false;
10037 }
10038
ebfd146a
IR
10039 return true;
10040}
10041
81c40241 10042/* Function vect_is_simple_use.
b690cc0f 10043
81c40241 10044 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10045 type of OPERAND and stores it to *VECTYPE. If the definition of
10046 OPERAND is vect_uninitialized_def, vect_constant_def or
10047 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10048 is responsible to compute the best suited vector type for the
10049 scalar operand. */
10050
10051bool
894dd753 10052vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
fef96d8e
RS
10053 tree *vectype, stmt_vec_info *def_stmt_info_out,
10054 gimple **def_stmt_out)
b690cc0f 10055{
fef96d8e 10056 stmt_vec_info def_stmt_info;
894dd753 10057 gimple *def_stmt;
fef96d8e 10058 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
b690cc0f
RG
10059 return false;
10060
894dd753
RS
10061 if (def_stmt_out)
10062 *def_stmt_out = def_stmt;
fef96d8e
RS
10063 if (def_stmt_info_out)
10064 *def_stmt_info_out = def_stmt_info;
894dd753 10065
b690cc0f
RG
10066 /* Now get a vector type if the def is internal, otherwise supply
10067 NULL_TREE and leave it up to the caller to figure out a proper
10068 type for the use stmt. */
10069 if (*dt == vect_internal_def
10070 || *dt == vect_induction_def
10071 || *dt == vect_reduction_def
10072 || *dt == vect_double_reduction_def
10073 || *dt == vect_nested_cycle)
10074 {
fef96d8e 10075 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
b690cc0f 10076 gcc_assert (*vectype != NULL_TREE);
30f502ed 10077 if (dump_enabled_p ())
3c2a8ed0
DM
10078 dump_printf_loc (MSG_NOTE, vect_location,
10079 "vect_is_simple_use: vectype %T\n", *vectype);
b690cc0f
RG
10080 }
10081 else if (*dt == vect_uninitialized_def
10082 || *dt == vect_constant_def
10083 || *dt == vect_external_def)
10084 *vectype = NULL_TREE;
10085 else
10086 gcc_unreachable ();
10087
10088 return true;
10089}
10090
ebfd146a
IR
10091
10092/* Function supportable_widening_operation
10093
b8698a0f
L
10094 Check whether an operation represented by the code CODE is a
10095 widening operation that is supported by the target platform in
b690cc0f
RG
10096 vector form (i.e., when operating on arguments of type VECTYPE_IN
10097 producing a result of type VECTYPE_OUT).
b8698a0f 10098
1bda738b
JJ
10099 Widening operations we currently support are NOP (CONVERT), FLOAT,
10100 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10101 are supported by the target platform either directly (via vector
10102 tree-codes), or via target builtins.
ebfd146a
IR
10103
10104 Output:
b8698a0f
L
10105 - CODE1 and CODE2 are codes of vector operations to be used when
10106 vectorizing the operation, if available.
ebfd146a
IR
10107 - MULTI_STEP_CVT determines the number of required intermediate steps in
10108 case of multi-step conversion (like char->short->int - in that case
10109 MULTI_STEP_CVT will be 1).
b8698a0f
L
10110 - INTERM_TYPES contains the intermediate type required to perform the
10111 widening operation (short in the above example). */
ebfd146a
IR
10112
10113bool
32e8e429 10114supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
b690cc0f 10115 tree vectype_out, tree vectype_in,
ebfd146a
IR
10116 enum tree_code *code1, enum tree_code *code2,
10117 int *multi_step_cvt,
9771b263 10118 vec<tree> *interm_types)
ebfd146a 10119{
ebfd146a 10120 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10121 struct loop *vect_loop = NULL;
ef4bddc2 10122 machine_mode vec_mode;
81f40b79 10123 enum insn_code icode1, icode2;
ebfd146a 10124 optab optab1, optab2;
b690cc0f
RG
10125 tree vectype = vectype_in;
10126 tree wide_vectype = vectype_out;
ebfd146a 10127 enum tree_code c1, c2;
4a00c761
JJ
10128 int i;
10129 tree prev_type, intermediate_type;
ef4bddc2 10130 machine_mode intermediate_mode, prev_mode;
4a00c761 10131 optab optab3, optab4;
ebfd146a 10132
4a00c761 10133 *multi_step_cvt = 0;
4ef69dfc
IR
10134 if (loop_info)
10135 vect_loop = LOOP_VINFO_LOOP (loop_info);
10136
ebfd146a
IR
10137 switch (code)
10138 {
10139 case WIDEN_MULT_EXPR:
6ae6116f
RH
10140 /* The result of a vectorized widening operation usually requires
10141 two vectors (because the widened results do not fit into one vector).
10142 The generated vector results would normally be expected to be
10143 generated in the same order as in the original scalar computation,
10144 i.e. if 8 results are generated in each vector iteration, they are
10145 to be organized as follows:
10146 vect1: [res1,res2,res3,res4],
10147 vect2: [res5,res6,res7,res8].
10148
10149 However, in the special case that the result of the widening
10150 operation is used in a reduction computation only, the order doesn't
10151 matter (because when vectorizing a reduction we change the order of
10152 the computation). Some targets can take advantage of this and
10153 generate more efficient code. For example, targets like Altivec,
10154 that support widen_mult using a sequence of {mult_even,mult_odd}
10155 generate the following vectors:
10156 vect1: [res1,res3,res5,res7],
10157 vect2: [res2,res4,res6,res8].
10158
10159 When vectorizing outer-loops, we execute the inner-loop sequentially
10160 (each vectorized inner-loop iteration contributes to VF outer-loop
10161 iterations in parallel). We therefore don't allow to change the
10162 order of the computation in the inner-loop during outer-loop
10163 vectorization. */
10164 /* TODO: Another case in which order doesn't *really* matter is when we
10165 widen and then contract again, e.g. (short)((int)x * y >> 8).
10166 Normally, pack_trunc performs an even/odd permute, whereas the
10167 repack from an even/odd expansion would be an interleave, which
10168 would be significantly simpler for e.g. AVX2. */
10169 /* In any case, in order to avoid duplicating the code below, recurse
10170 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10171 are properly set up for the caller. If we fail, we'll continue with
10172 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10173 if (vect_loop
10174 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
86a91c0a 10175 && !nested_in_vect_loop_p (vect_loop, stmt_info)
6ae6116f 10176 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
86a91c0a
RS
10177 stmt_info, vectype_out,
10178 vectype_in, code1, code2,
10179 multi_step_cvt, interm_types))
ebc047a2
CH
10180 {
10181 /* Elements in a vector with vect_used_by_reduction property cannot
10182 be reordered if the use chain with this property does not have the
10183 same operation. One such an example is s += a * b, where elements
10184 in a and b cannot be reordered. Here we check if the vector defined
10185 by STMT is only directly used in the reduction statement. */
86a91c0a 10186 tree lhs = gimple_assign_lhs (stmt_info->stmt);
0d0a4e20
RS
10187 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10188 if (use_stmt_info
10189 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10190 return true;
ebc047a2 10191 }
4a00c761
JJ
10192 c1 = VEC_WIDEN_MULT_LO_EXPR;
10193 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10194 break;
10195
81c40241
RB
10196 case DOT_PROD_EXPR:
10197 c1 = DOT_PROD_EXPR;
10198 c2 = DOT_PROD_EXPR;
10199 break;
10200
10201 case SAD_EXPR:
10202 c1 = SAD_EXPR;
10203 c2 = SAD_EXPR;
10204 break;
10205
6ae6116f
RH
10206 case VEC_WIDEN_MULT_EVEN_EXPR:
10207 /* Support the recursion induced just above. */
10208 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10209 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10210 break;
10211
36ba4aae 10212 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10213 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10214 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10215 break;
10216
ebfd146a 10217 CASE_CONVERT:
4a00c761
JJ
10218 c1 = VEC_UNPACK_LO_EXPR;
10219 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10220 break;
10221
10222 case FLOAT_EXPR:
4a00c761
JJ
10223 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10224 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10225 break;
10226
10227 case FIX_TRUNC_EXPR:
1bda738b
JJ
10228 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10229 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10230 break;
ebfd146a
IR
10231
10232 default:
10233 gcc_unreachable ();
10234 }
10235
6ae6116f 10236 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10237 std::swap (c1, c2);
4a00c761 10238
ebfd146a
IR
10239 if (code == FIX_TRUNC_EXPR)
10240 {
10241 /* The signedness is determined from output operand. */
b690cc0f
RG
10242 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10243 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10244 }
10245 else
10246 {
10247 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10248 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10249 }
10250
10251 if (!optab1 || !optab2)
10252 return false;
10253
10254 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10255 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10256 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10257 return false;
10258
4a00c761
JJ
10259 *code1 = c1;
10260 *code2 = c2;
10261
10262 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10263 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10264 /* For scalar masks we may have different boolean
10265 vector types having the same QImode. Thus we
10266 add additional check for elements number. */
10267 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10268 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10269 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10270
b8698a0f 10271 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10272 types. */
ebfd146a 10273
4a00c761
JJ
10274 prev_type = vectype;
10275 prev_mode = vec_mode;
b8698a0f 10276
4a00c761
JJ
10277 if (!CONVERT_EXPR_CODE_P (code))
10278 return false;
b8698a0f 10279
4a00c761
JJ
10280 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10281 intermediate steps in promotion sequence. We try
10282 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10283 not. */
9771b263 10284 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10285 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10286 {
10287 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10288 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10289 {
7cfb4d93 10290 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10291 if (intermediate_mode != TYPE_MODE (intermediate_type))
10292 return false;
10293 }
10294 else
10295 intermediate_type
10296 = lang_hooks.types.type_for_mode (intermediate_mode,
10297 TYPE_UNSIGNED (prev_type));
10298
4a00c761
JJ
10299 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10300 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10301
10302 if (!optab3 || !optab4
10303 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10304 || insn_data[icode1].operand[0].mode != intermediate_mode
10305 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10306 || insn_data[icode2].operand[0].mode != intermediate_mode
10307 || ((icode1 = optab_handler (optab3, intermediate_mode))
10308 == CODE_FOR_nothing)
10309 || ((icode2 = optab_handler (optab4, intermediate_mode))
10310 == CODE_FOR_nothing))
10311 break;
ebfd146a 10312
9771b263 10313 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10314 (*multi_step_cvt)++;
10315
10316 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10317 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10318 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10319 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10320 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10321
10322 prev_type = intermediate_type;
10323 prev_mode = intermediate_mode;
ebfd146a
IR
10324 }
10325
9771b263 10326 interm_types->release ();
4a00c761 10327 return false;
ebfd146a
IR
10328}
10329
10330
10331/* Function supportable_narrowing_operation
10332
b8698a0f
L
10333 Check whether an operation represented by the code CODE is a
10334 narrowing operation that is supported by the target platform in
b690cc0f
RG
10335 vector form (i.e., when operating on arguments of type VECTYPE_IN
10336 and producing a result of type VECTYPE_OUT).
b8698a0f 10337
1bda738b
JJ
10338 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10339 and FLOAT. This function checks if these operations are supported by
ebfd146a
IR
10340 the target platform directly via vector tree-codes.
10341
10342 Output:
b8698a0f
L
10343 - CODE1 is the code of a vector operation to be used when
10344 vectorizing the operation, if available.
ebfd146a
IR
10345 - MULTI_STEP_CVT determines the number of required intermediate steps in
10346 case of multi-step conversion (like int->short->char - in that case
10347 MULTI_STEP_CVT will be 1).
10348 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10349 narrowing operation (short in the above example). */
ebfd146a
IR
10350
10351bool
10352supportable_narrowing_operation (enum tree_code code,
b690cc0f 10353 tree vectype_out, tree vectype_in,
ebfd146a 10354 enum tree_code *code1, int *multi_step_cvt,
9771b263 10355 vec<tree> *interm_types)
ebfd146a 10356{
ef4bddc2 10357 machine_mode vec_mode;
ebfd146a
IR
10358 enum insn_code icode1;
10359 optab optab1, interm_optab;
b690cc0f
RG
10360 tree vectype = vectype_in;
10361 tree narrow_vectype = vectype_out;
ebfd146a 10362 enum tree_code c1;
3ae0661a 10363 tree intermediate_type, prev_type;
ef4bddc2 10364 machine_mode intermediate_mode, prev_mode;
ebfd146a 10365 int i;
4a00c761 10366 bool uns;
ebfd146a 10367
4a00c761 10368 *multi_step_cvt = 0;
ebfd146a
IR
10369 switch (code)
10370 {
10371 CASE_CONVERT:
10372 c1 = VEC_PACK_TRUNC_EXPR;
10373 break;
10374
10375 case FIX_TRUNC_EXPR:
10376 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10377 break;
10378
10379 case FLOAT_EXPR:
1bda738b
JJ
10380 c1 = VEC_PACK_FLOAT_EXPR;
10381 break;
ebfd146a
IR
10382
10383 default:
10384 gcc_unreachable ();
10385 }
10386
10387 if (code == FIX_TRUNC_EXPR)
10388 /* The signedness is determined from output operand. */
b690cc0f 10389 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10390 else
10391 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10392
10393 if (!optab1)
10394 return false;
10395
10396 vec_mode = TYPE_MODE (vectype);
947131ba 10397 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10398 return false;
10399
4a00c761
JJ
10400 *code1 = c1;
10401
10402 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10403 /* For scalar masks we may have different boolean
10404 vector types having the same QImode. Thus we
10405 add additional check for elements number. */
10406 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10407 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10408 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10409
1bda738b
JJ
10410 if (code == FLOAT_EXPR)
10411 return false;
10412
ebfd146a
IR
10413 /* Check if it's a multi-step conversion that can be done using intermediate
10414 types. */
4a00c761 10415 prev_mode = vec_mode;
3ae0661a 10416 prev_type = vectype;
4a00c761
JJ
10417 if (code == FIX_TRUNC_EXPR)
10418 uns = TYPE_UNSIGNED (vectype_out);
10419 else
10420 uns = TYPE_UNSIGNED (vectype);
10421
10422 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10423 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10424 costly than signed. */
10425 if (code == FIX_TRUNC_EXPR && uns)
10426 {
10427 enum insn_code icode2;
10428
10429 intermediate_type
10430 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10431 interm_optab
10432 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10433 if (interm_optab != unknown_optab
4a00c761
JJ
10434 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10435 && insn_data[icode1].operand[0].mode
10436 == insn_data[icode2].operand[0].mode)
10437 {
10438 uns = false;
10439 optab1 = interm_optab;
10440 icode1 = icode2;
10441 }
10442 }
ebfd146a 10443
4a00c761
JJ
10444 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10445 intermediate steps in promotion sequence. We try
10446 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10447 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10448 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10449 {
10450 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10451 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10452 {
7cfb4d93 10453 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10454 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10455 return false;
3ae0661a
IE
10456 }
10457 else
10458 intermediate_type
10459 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10460 interm_optab
10461 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10462 optab_default);
10463 if (!interm_optab
10464 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10465 || insn_data[icode1].operand[0].mode != intermediate_mode
10466 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10467 == CODE_FOR_nothing))
10468 break;
10469
9771b263 10470 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10471 (*multi_step_cvt)++;
10472
10473 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10474 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10475 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10476 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10477
10478 prev_mode = intermediate_mode;
3ae0661a 10479 prev_type = intermediate_type;
4a00c761 10480 optab1 = interm_optab;
ebfd146a
IR
10481 }
10482
9771b263 10483 interm_types->release ();
4a00c761 10484 return false;
ebfd146a 10485}
7cfb4d93
RS
10486
10487/* Generate and return a statement that sets vector mask MASK such that
10488 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10489
10490gcall *
10491vect_gen_while (tree mask, tree start_index, tree end_index)
10492{
10493 tree cmp_type = TREE_TYPE (start_index);
10494 tree mask_type = TREE_TYPE (mask);
10495 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10496 cmp_type, mask_type,
10497 OPTIMIZE_FOR_SPEED));
10498 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10499 start_index, end_index,
10500 build_zero_cst (mask_type));
10501 gimple_call_set_lhs (call, mask);
10502 return call;
10503}
535e7c11
RS
10504
10505/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10506 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10507
10508tree
10509vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10510 tree end_index)
10511{
10512 tree tmp = make_ssa_name (mask_type);
10513 gcall *call = vect_gen_while (tmp, start_index, end_index);
10514 gimple_seq_add_stmt (seq, call);
10515 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10516}
1f3cb663
RS
10517
10518/* Try to compute the vector types required to vectorize STMT_INFO,
10519 returning true on success and false if vectorization isn't possible.
10520
10521 On success:
10522
10523 - Set *STMT_VECTYPE_OUT to:
10524 - NULL_TREE if the statement doesn't need to be vectorized;
10525 - boolean_type_node if the statement is a boolean operation whose
10526 vector type can only be determined once all the other vector types
10527 are known; and
10528 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10529
10530 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10531 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10532 statement does not help to determine the overall number of units. */
10533
f4ebbd24 10534opt_result
1f3cb663
RS
10535vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10536 tree *stmt_vectype_out,
10537 tree *nunits_vectype_out)
10538{
10539 gimple *stmt = stmt_info->stmt;
10540
10541 *stmt_vectype_out = NULL_TREE;
10542 *nunits_vectype_out = NULL_TREE;
10543
10544 if (gimple_get_lhs (stmt) == NULL_TREE
10545 /* MASK_STORE has no lhs, but is ok. */
10546 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10547 {
10548 if (is_a <gcall *> (stmt))
10549 {
10550 /* Ignore calls with no lhs. These must be calls to
10551 #pragma omp simd functions, and what vectorization factor
10552 it really needs can't be determined until
10553 vectorizable_simd_clone_call. */
10554 if (dump_enabled_p ())
10555 dump_printf_loc (MSG_NOTE, vect_location,
10556 "defer to SIMD clone analysis.\n");
f4ebbd24 10557 return opt_result::success ();
1f3cb663
RS
10558 }
10559
f4ebbd24
DM
10560 return opt_result::failure_at (stmt,
10561 "not vectorized: irregular stmt.%G", stmt);
1f3cb663
RS
10562 }
10563
10564 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
f4ebbd24
DM
10565 return opt_result::failure_at (stmt,
10566 "not vectorized: vector stmt in loop:%G",
10567 stmt);
1f3cb663
RS
10568
10569 tree vectype;
10570 tree scalar_type = NULL_TREE;
10571 if (STMT_VINFO_VECTYPE (stmt_info))
10572 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10573 else
10574 {
10575 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10576 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10577 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10578 else
10579 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10580
10581 /* Pure bool ops don't participate in number-of-units computation.
10582 For comparisons use the types being compared. */
10583 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10584 && is_gimple_assign (stmt)
10585 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10586 {
10587 *stmt_vectype_out = boolean_type_node;
10588
10589 tree rhs1 = gimple_assign_rhs1 (stmt);
10590 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10591 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10592 scalar_type = TREE_TYPE (rhs1);
10593 else
10594 {
10595 if (dump_enabled_p ())
10596 dump_printf_loc (MSG_NOTE, vect_location,
10597 "pure bool operation.\n");
f4ebbd24 10598 return opt_result::success ();
1f3cb663
RS
10599 }
10600 }
10601
10602 if (dump_enabled_p ())
3c2a8ed0
DM
10603 dump_printf_loc (MSG_NOTE, vect_location,
10604 "get vectype for scalar type: %T\n", scalar_type);
1f3cb663
RS
10605 vectype = get_vectype_for_scalar_type (scalar_type);
10606 if (!vectype)
f4ebbd24
DM
10607 return opt_result::failure_at (stmt,
10608 "not vectorized:"
10609 " unsupported data-type %T\n",
10610 scalar_type);
1f3cb663
RS
10611
10612 if (!*stmt_vectype_out)
10613 *stmt_vectype_out = vectype;
10614
10615 if (dump_enabled_p ())
3c2a8ed0 10616 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
1f3cb663
RS
10617 }
10618
10619 /* Don't try to compute scalar types if the stmt produces a boolean
10620 vector; use the existing vector type instead. */
10621 tree nunits_vectype;
10622 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10623 nunits_vectype = vectype;
10624 else
10625 {
10626 /* The number of units is set according to the smallest scalar
10627 type (or the largest vector size, but we only support one
10628 vector size per vectorization). */
10629 if (*stmt_vectype_out != boolean_type_node)
10630 {
10631 HOST_WIDE_INT dummy;
86a91c0a
RS
10632 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10633 &dummy, &dummy);
1f3cb663
RS
10634 }
10635 if (dump_enabled_p ())
3c2a8ed0
DM
10636 dump_printf_loc (MSG_NOTE, vect_location,
10637 "get vectype for scalar type: %T\n", scalar_type);
1f3cb663
RS
10638 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10639 }
10640 if (!nunits_vectype)
f4ebbd24
DM
10641 return opt_result::failure_at (stmt,
10642 "not vectorized: unsupported data-type %T\n",
10643 scalar_type);
1f3cb663
RS
10644
10645 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10646 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
f4ebbd24
DM
10647 return opt_result::failure_at (stmt,
10648 "not vectorized: different sized vector "
10649 "types in statement, %T and %T\n",
10650 vectype, nunits_vectype);
1f3cb663
RS
10651
10652 if (dump_enabled_p ())
10653 {
3c2a8ed0
DM
10654 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10655 nunits_vectype);
1f3cb663
RS
10656
10657 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10658 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10659 dump_printf (MSG_NOTE, "\n");
10660 }
10661
10662 *nunits_vectype_out = nunits_vectype;
f4ebbd24 10663 return opt_result::success ();
1f3cb663
RS
10664}
10665
10666/* Try to determine the correct vector type for STMT_INFO, which is a
10667 statement that produces a scalar boolean result. Return the vector
10668 type on success, otherwise return NULL_TREE. */
10669
f4ebbd24 10670opt_tree
1f3cb663
RS
10671vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10672{
10673 gimple *stmt = stmt_info->stmt;
10674 tree mask_type = NULL;
10675 tree vectype, scalar_type;
10676
10677 if (is_gimple_assign (stmt)
10678 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10679 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10680 {
10681 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10682 mask_type = get_mask_type_for_scalar_type (scalar_type);
10683
10684 if (!mask_type)
f4ebbd24
DM
10685 return opt_tree::failure_at (stmt,
10686 "not vectorized: unsupported mask\n");
1f3cb663
RS
10687 }
10688 else
10689 {
10690 tree rhs;
10691 ssa_op_iter iter;
1f3cb663
RS
10692 enum vect_def_type dt;
10693
10694 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10695 {
894dd753 10696 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
f4ebbd24
DM
10697 return opt_tree::failure_at (stmt,
10698 "not vectorized:can't compute mask"
10699 " type for statement, %G", stmt);
1f3cb663
RS
10700
10701 /* No vectype probably means external definition.
10702 Allow it in case there is another operand which
10703 allows to determine mask type. */
10704 if (!vectype)
10705 continue;
10706
10707 if (!mask_type)
10708 mask_type = vectype;
10709 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10710 TYPE_VECTOR_SUBPARTS (vectype)))
f4ebbd24
DM
10711 return opt_tree::failure_at (stmt,
10712 "not vectorized: different sized mask"
10713 " types in statement, %T and %T\n",
10714 mask_type, vectype);
1f3cb663
RS
10715 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10716 != VECTOR_BOOLEAN_TYPE_P (vectype))
f4ebbd24
DM
10717 return opt_tree::failure_at (stmt,
10718 "not vectorized: mixed mask and "
10719 "nonmask vector types in statement, "
10720 "%T and %T\n",
10721 mask_type, vectype);
1f3cb663
RS
10722 }
10723
10724 /* We may compare boolean value loaded as vector of integers.
10725 Fix mask_type in such case. */
10726 if (mask_type
10727 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10728 && gimple_code (stmt) == GIMPLE_ASSIGN
10729 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10730 mask_type = build_same_sized_truth_vector_type (mask_type);
10731 }
10732
10733 /* No mask_type should mean loop invariant predicate.
10734 This is probably a subject for optimization in if-conversion. */
f4ebbd24
DM
10735 if (!mask_type)
10736 return opt_tree::failure_at (stmt,
10737 "not vectorized: can't compute mask type "
10738 "for statement: %G", stmt);
10739
10740 return opt_tree::success (mask_type);
1f3cb663 10741}