]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
[25/46] Make get_earlier/later_stmt take and return stmt_vec_infos
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2 100
211ee39b 101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
68435eb2
RB
102 body_cost_vec->safe_push (si);
103
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
107}
108
272c6793
RS
109/* Return a variable of type ELEM_TYPE[NELEMS]. */
110
111static tree
112create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
113{
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
116}
117
118/* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT and the vector is associated
121 with scalar destination SCALAR_DEST. */
122
123static tree
355fe088 124read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
125 tree array, unsigned HOST_WIDE_INT n)
126{
127 tree vect_type, vect, vect_name, array_ref;
355fe088 128 gimple *new_stmt;
272c6793
RS
129
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
136
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
141
142 return vect_name;
143}
144
145/* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT. */
148
149static void
355fe088 150write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
151 tree array, unsigned HOST_WIDE_INT n)
152{
153 tree array_ref;
355fe088 154 gimple *new_stmt;
272c6793
RS
155
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
159
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
162}
163
164/* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
167
168static tree
44fc7854 169create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 170{
44fc7854 171 tree mem_ref;
272c6793 172
272c6793
RS
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
644ffefd 175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
176 return mem_ref;
177}
178
3ba4ff41
RS
179/* Add a clobber of variable VAR to the vectorization of STMT.
180 Emit the clobber before *GSI. */
181
182static void
183vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
184{
185 tree clobber = build_clobber (TREE_TYPE (var));
186 gimple *new_stmt = gimple_build_assign (var, clobber);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
188}
189
ebfd146a
IR
190/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
191
192/* Function vect_mark_relevant.
193
194 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
195
196static void
355fe088 197vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 198 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
199{
200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
203
73fbfcad 204 if (dump_enabled_p ())
66c16fd9
RB
205 {
206 dump_printf_loc (MSG_NOTE, vect_location,
207 "mark relevant %d, live %d: ", relevant, live_p);
208 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
209 }
ebfd146a 210
83197f37
IR
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
ebfd146a
IR
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
216 {
97ecdb46
JJ
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
83197f37 221
97ecdb46
JJ
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_NOTE, vect_location,
224 "last stmt in pattern. don't mark"
225 " relevant/live.\n");
10681ce8
RS
226 stmt_vec_info old_stmt_info = stmt_info;
227 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
228 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
97ecdb46
JJ
229 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
230 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
10681ce8 231 stmt = stmt_info->stmt;
ebfd146a
IR
232 }
233
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 {
73fbfcad 241 if (dump_enabled_p ())
78c60e3d 242 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 243 "already marked relevant/live.\n");
ebfd146a
IR
244 return;
245 }
246
9771b263 247 worklist->safe_push (stmt);
ebfd146a
IR
248}
249
250
b28ead45
AH
251/* Function is_simple_and_all_uses_invariant
252
253 Return true if STMT is simple and all uses of it are invariant. */
254
255bool
256is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
257{
258 tree op;
b28ead45
AH
259 ssa_op_iter iter;
260
261 if (!is_gimple_assign (stmt))
262 return false;
263
264 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
265 {
266 enum vect_def_type dt = vect_uninitialized_def;
267
894dd753 268 if (!vect_is_simple_use (op, loop_vinfo, &dt))
b28ead45
AH
269 {
270 if (dump_enabled_p ())
271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
272 "use not simple.\n");
273 return false;
274 }
275
276 if (dt != vect_external_def && dt != vect_constant_def)
277 return false;
278 }
279 return true;
280}
281
ebfd146a
IR
282/* Function vect_stmt_relevant_p.
283
284 Return true if STMT in loop that is represented by LOOP_VINFO is
285 "relevant for vectorization".
286
287 A stmt is considered "relevant for vectorization" if:
288 - it has uses outside the loop.
289 - it has vdefs (it alters memory).
290 - control stmts in the loop (except for the exit condition).
291
292 CHECKME: what other side effects would the vectorizer allow? */
293
294static bool
355fe088 295vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
296 enum vect_relevant *relevant, bool *live_p)
297{
298 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
299 ssa_op_iter op_iter;
300 imm_use_iterator imm_iter;
301 use_operand_p use_p;
302 def_operand_p def_p;
303
8644a673 304 *relevant = vect_unused_in_scope;
ebfd146a
IR
305 *live_p = false;
306
307 /* cond stmt other than loop exit cond. */
b8698a0f
L
308 if (is_ctrl_stmt (stmt)
309 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
310 != loop_exit_ctrl_vec_info_type)
8644a673 311 *relevant = vect_used_in_scope;
ebfd146a
IR
312
313 /* changing memory. */
314 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
315 if (gimple_vdef (stmt)
316 && !gimple_clobber_p (stmt))
ebfd146a 317 {
73fbfcad 318 if (dump_enabled_p ())
78c60e3d 319 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 320 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 321 *relevant = vect_used_in_scope;
ebfd146a
IR
322 }
323
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
326 {
327 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
328 {
329 basic_block bb = gimple_bb (USE_STMT (use_p));
330 if (!flow_bb_inside_loop_p (loop, bb))
331 {
73fbfcad 332 if (dump_enabled_p ())
78c60e3d 333 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 334 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 335
3157b0c2
AO
336 if (is_gimple_debug (USE_STMT (use_p)))
337 continue;
338
ebfd146a
IR
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 gcc_assert (bb == single_exit (loop)->dest);
343
344 *live_p = true;
345 }
346 }
347 }
348
3a2edf4c
AH
349 if (*live_p && *relevant == vect_unused_in_scope
350 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
351 {
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE, vect_location,
354 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355 *relevant = vect_used_only_live;
356 }
357
ebfd146a
IR
358 return (*live_p || *relevant);
359}
360
361
b8698a0f 362/* Function exist_non_indexing_operands_for_use_p
ebfd146a 363
ff802fa1 364 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
365 used in STMT for anything other than indexing an array. */
366
367static bool
355fe088 368exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
369{
370 tree operand;
371 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 372
ff802fa1 373 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
59a05b0c 378
ebfd146a
IR
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
384
385 'var' in the second case corresponds to a def, not a use,
b8698a0f 386 so USE cannot correspond to any operands that are not used
ebfd146a
IR
387 for array indexing.
388
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
ebfd146a
IR
391
392 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
393 {
394 if (is_gimple_call (stmt)
395 && gimple_call_internal_p (stmt))
bfaa08b7
RS
396 {
397 internal_fn ifn = gimple_call_internal_fn (stmt);
398 int mask_index = internal_fn_mask_index (ifn);
399 if (mask_index >= 0
400 && use == gimple_call_arg (stmt, mask_index))
401 return true;
f307441a
RS
402 int stored_value_index = internal_fn_stored_value_index (ifn);
403 if (stored_value_index >= 0
404 && use == gimple_call_arg (stmt, stored_value_index))
405 return true;
bfaa08b7
RS
406 if (internal_gather_scatter_fn_p (ifn)
407 && use == gimple_call_arg (stmt, 1))
408 return true;
bfaa08b7 409 }
5ce9450f
JJ
410 return false;
411 }
412
59a05b0c
EB
413 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
414 return false;
ebfd146a 415 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
416 if (TREE_CODE (operand) != SSA_NAME)
417 return false;
418
419 if (operand == use)
420 return true;
421
422 return false;
423}
424
425
b8698a0f 426/*
ebfd146a
IR
427 Function process_use.
428
429 Inputs:
430 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 431 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 432 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 433 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
434 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 be performed.
ebfd146a
IR
436
437 Outputs:
438 Generally, LIVE_P and RELEVANT are used to define the liveness and
439 relevance info of the DEF_STMT of this USE:
440 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
441 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
442 Exceptions:
443 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 444 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 445 of the respective DEF_STMT is left unchanged.
b8698a0f
L
446 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
447 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
448 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
449 be modified accordingly.
450
451 Return true if everything is as expected. Return false otherwise. */
452
453static bool
b28ead45 454process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 455 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 456 bool force)
ebfd146a 457{
ebfd146a
IR
458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
459 stmt_vec_info dstmt_vinfo;
460 basic_block bb, def_bb;
ebfd146a
IR
461 enum vect_def_type dt;
462
b8698a0f 463 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 464 that are used for address computation are not considered relevant. */
aec7ae7d 465 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
466 return true;
467
fef96d8e 468 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
b8698a0f 469 {
73fbfcad 470 if (dump_enabled_p ())
78c60e3d 471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 472 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
473 return false;
474 }
475
fef96d8e 476 if (!dstmt_vinfo)
ebfd146a
IR
477 return true;
478
fef96d8e 479 def_bb = gimple_bb (dstmt_vinfo->stmt);
ebfd146a 480
fef96d8e
RS
481 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
482 DSTMT_VINFO must have already been processed, because this should be the
b8698a0f 483 only way that STMT, which is a reduction-phi, was put in the worklist,
fef96d8e 484 as there should be no other uses for DSTMT_VINFO in the loop. So we just
ebfd146a 485 check that everything is as expected, and we are done. */
ebfd146a
IR
486 bb = gimple_bb (stmt);
487 if (gimple_code (stmt) == GIMPLE_PHI
488 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
fef96d8e 489 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
ebfd146a
IR
490 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
491 && bb->loop_father == def_bb->loop_father)
492 {
73fbfcad 493 if (dump_enabled_p ())
78c60e3d 494 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 495 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a 496 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 497 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 498 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
499 return true;
500 }
501
502 /* case 3a: outer-loop stmt defining an inner-loop stmt:
503 outer-loop-header-bb:
fef96d8e 504 d = dstmt_vinfo
ebfd146a
IR
505 inner-loop:
506 stmt # use (d)
507 outer-loop-tail-bb:
508 ... */
509 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
510 {
73fbfcad 511 if (dump_enabled_p ())
78c60e3d 512 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 513 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 514
ebfd146a
IR
515 switch (relevant)
516 {
8644a673 517 case vect_unused_in_scope:
7c5222ff
IR
518 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
519 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 520 break;
7c5222ff 521
ebfd146a 522 case vect_used_in_outer_by_reduction:
7c5222ff 523 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
524 relevant = vect_used_by_reduction;
525 break;
7c5222ff 526
ebfd146a 527 case vect_used_in_outer:
7c5222ff 528 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 529 relevant = vect_used_in_scope;
ebfd146a 530 break;
7c5222ff 531
8644a673 532 case vect_used_in_scope:
ebfd146a
IR
533 break;
534
535 default:
536 gcc_unreachable ();
b8698a0f 537 }
ebfd146a
IR
538 }
539
540 /* case 3b: inner-loop stmt defining an outer-loop stmt:
541 outer-loop-header-bb:
542 ...
543 inner-loop:
fef96d8e 544 d = dstmt_vinfo
06066f92 545 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
546 stmt # use (d) */
547 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
548 {
73fbfcad 549 if (dump_enabled_p ())
78c60e3d 550 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 551 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 552
ebfd146a
IR
553 switch (relevant)
554 {
8644a673 555 case vect_unused_in_scope:
b8698a0f 556 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 557 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 558 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
559 break;
560
ebfd146a 561 case vect_used_by_reduction:
b28ead45 562 case vect_used_only_live:
ebfd146a
IR
563 relevant = vect_used_in_outer_by_reduction;
564 break;
565
8644a673 566 case vect_used_in_scope:
ebfd146a
IR
567 relevant = vect_used_in_outer;
568 break;
569
570 default:
571 gcc_unreachable ();
572 }
573 }
643a9684
RB
574 /* We are also not interested in uses on loop PHI backedges that are
575 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
576 and cause hybrid SLP for SLP inductions. Unless the PHI is live
577 of course. */
643a9684
RB
578 else if (gimple_code (stmt) == GIMPLE_PHI
579 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 580 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
581 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
582 == use))
583 {
584 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE, vect_location,
586 "induction value on backedge.\n");
587 return true;
588 }
589
ebfd146a 590
fef96d8e 591 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
ebfd146a
IR
592 return true;
593}
594
595
596/* Function vect_mark_stmts_to_be_vectorized.
597
598 Not all stmts in the loop need to be vectorized. For example:
599
600 for i...
601 for j...
602 1. T0 = i + j
603 2. T1 = a[T0]
604
605 3. j = j + 1
606
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
609
610 This pass detects such stmts. */
611
612bool
613vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
614{
ebfd146a
IR
615 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
616 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
617 unsigned int nbbs = loop->num_nodes;
618 gimple_stmt_iterator si;
355fe088 619 gimple *stmt;
ebfd146a
IR
620 unsigned int i;
621 stmt_vec_info stmt_vinfo;
622 basic_block bb;
355fe088 623 gimple *phi;
ebfd146a 624 bool live_p;
b28ead45 625 enum vect_relevant relevant;
ebfd146a 626
adac3a68 627 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
ebfd146a 628
355fe088 629 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
630
631 /* 1. Init worklist. */
632 for (i = 0; i < nbbs; i++)
633 {
634 bb = bbs[i];
635 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 636 {
ebfd146a 637 phi = gsi_stmt (si);
73fbfcad 638 if (dump_enabled_p ())
ebfd146a 639 {
78c60e3d
SS
640 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
642 }
643
644 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 645 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
646 }
647 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
648 {
649 stmt = gsi_stmt (si);
73fbfcad 650 if (dump_enabled_p ())
ebfd146a 651 {
78c60e3d
SS
652 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 654 }
ebfd146a
IR
655
656 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 657 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
658 }
659 }
660
661 /* 2. Process_worklist */
9771b263 662 while (worklist.length () > 0)
ebfd146a
IR
663 {
664 use_operand_p use_p;
665 ssa_op_iter iter;
666
9771b263 667 stmt = worklist.pop ();
73fbfcad 668 if (dump_enabled_p ())
ebfd146a 669 {
78c60e3d
SS
670 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
672 }
673
b8698a0f 674 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
675 (DEF_STMT) as relevant/irrelevant according to the relevance property
676 of STMT. */
ebfd146a
IR
677 stmt_vinfo = vinfo_for_stmt (stmt);
678 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 679
b28ead45
AH
680 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
681 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
682
683 One exception is when STMT has been identified as defining a reduction
b28ead45 684 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 685 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 686 those that are used by a reduction computation, and those that are
ff802fa1 687 (also) used by a regular computation. This allows us later on to
b8698a0f 688 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 689 order of the results that they produce does not have to be kept. */
ebfd146a 690
b28ead45 691 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 692 {
06066f92 693 case vect_reduction_def:
b28ead45
AH
694 gcc_assert (relevant != vect_unused_in_scope);
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_in_scope
697 && relevant != vect_used_by_reduction
698 && relevant != vect_used_only_live)
06066f92 699 {
b28ead45
AH
700 if (dump_enabled_p ())
701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
702 "unsupported use of reduction.\n");
703 return false;
06066f92 704 }
06066f92 705 break;
b8698a0f 706
06066f92 707 case vect_nested_cycle:
b28ead45
AH
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_in_outer_by_reduction
710 && relevant != vect_used_in_outer)
06066f92 711 {
73fbfcad 712 if (dump_enabled_p ())
78c60e3d 713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 714 "unsupported use of nested cycle.\n");
7c5222ff 715
06066f92
IR
716 return false;
717 }
b8698a0f
L
718 break;
719
06066f92 720 case vect_double_reduction_def:
b28ead45
AH
721 if (relevant != vect_unused_in_scope
722 && relevant != vect_used_by_reduction
723 && relevant != vect_used_only_live)
06066f92 724 {
73fbfcad 725 if (dump_enabled_p ())
78c60e3d 726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 727 "unsupported use of double reduction.\n");
7c5222ff 728
7c5222ff 729 return false;
06066f92 730 }
b8698a0f 731 break;
7c5222ff 732
06066f92
IR
733 default:
734 break;
7c5222ff 735 }
b8698a0f 736
aec7ae7d 737 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
738 {
739 /* Pattern statements are not inserted into the code, so
740 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
741 have to scan the RHS or function arguments instead. */
742 if (is_gimple_assign (stmt))
743 {
69d2aade
JJ
744 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
745 tree op = gimple_assign_rhs1 (stmt);
746
747 i = 1;
748 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
749 {
750 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 751 relevant, &worklist, false)
69d2aade 752 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 753 relevant, &worklist, false))
566d377a 754 return false;
69d2aade
JJ
755 i = 2;
756 }
757 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 758 {
69d2aade 759 op = gimple_op (stmt, i);
afbe6325 760 if (TREE_CODE (op) == SSA_NAME
b28ead45 761 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 762 &worklist, false))
07687835 763 return false;
9d5e7640
IR
764 }
765 }
766 else if (is_gimple_call (stmt))
767 {
768 for (i = 0; i < gimple_call_num_args (stmt); i++)
769 {
770 tree arg = gimple_call_arg (stmt, i);
b28ead45 771 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 772 &worklist, false))
07687835 773 return false;
9d5e7640
IR
774 }
775 }
776 }
777 else
778 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
779 {
780 tree op = USE_FROM_PTR (use_p);
b28ead45 781 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 782 &worklist, false))
07687835 783 return false;
9d5e7640 784 }
aec7ae7d 785
3bab6342 786 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 787 {
134c85ca
RS
788 gather_scatter_info gs_info;
789 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
790 gcc_unreachable ();
791 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
792 &worklist, true))
566d377a 793 return false;
aec7ae7d 794 }
ebfd146a
IR
795 } /* while worklist */
796
ebfd146a
IR
797 return true;
798}
799
68435eb2
RB
800/* Compute the prologue cost for invariant or constant operands. */
801
802static unsigned
803vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
804 unsigned opno, enum vect_def_type dt,
805 stmt_vector_for_cost *cost_vec)
806{
b9787581 807 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
68435eb2
RB
808 tree op = gimple_op (stmt, opno);
809 unsigned prologue_cost = 0;
810
811 /* Without looking at the actual initializer a vector of
812 constants can be implemented as load from the constant pool.
813 When all elements are the same we can use a splat. */
814 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
815 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
816 unsigned num_vects_to_check;
817 unsigned HOST_WIDE_INT const_nunits;
818 unsigned nelt_limit;
819 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
820 && ! multiple_p (const_nunits, group_size))
821 {
822 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
823 nelt_limit = const_nunits;
824 }
825 else
826 {
827 /* If either the vector has variable length or the vectors
828 are composed of repeated whole groups we only need to
829 cost construction once. All vectors will be the same. */
830 num_vects_to_check = 1;
831 nelt_limit = group_size;
832 }
833 tree elt = NULL_TREE;
834 unsigned nelt = 0;
835 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
836 {
837 unsigned si = j % group_size;
838 if (nelt == 0)
b9787581 839 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
68435eb2
RB
840 /* ??? We're just tracking whether all operands of a single
841 vector initializer are the same, ideally we'd check if
842 we emitted the same one already. */
b9787581 843 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
68435eb2
RB
844 opno))
845 elt = NULL_TREE;
846 nelt++;
847 if (nelt == nelt_limit)
848 {
849 /* ??? We need to pass down stmt_info for a vector type
850 even if it points to the wrong stmt. */
851 prologue_cost += record_stmt_cost
852 (cost_vec, 1,
853 dt == vect_external_def
854 ? (elt ? scalar_to_vec : vec_construct)
855 : vector_load,
856 stmt_info, 0, vect_prologue);
857 nelt = 0;
858 }
859 }
860
861 return prologue_cost;
862}
ebfd146a 863
b8698a0f 864/* Function vect_model_simple_cost.
ebfd146a 865
b8698a0f 866 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
867 single op. Right now, this does not account for multiple insns that could
868 be generated for the single vector op. We will handle that shortly. */
869
68435eb2 870static void
b8698a0f 871vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 872 enum vect_def_type *dt,
4fc5ebf1 873 int ndts,
68435eb2
RB
874 slp_tree node,
875 stmt_vector_for_cost *cost_vec)
ebfd146a 876{
92345349 877 int inside_cost = 0, prologue_cost = 0;
ebfd146a 878
68435eb2 879 gcc_assert (cost_vec != NULL);
ebfd146a 880
68435eb2
RB
881 /* ??? Somehow we need to fix this at the callers. */
882 if (node)
883 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
884
885 if (node)
886 {
887 /* Scan operands and account for prologue cost of constants/externals.
888 ??? This over-estimates cost for multiple uses and should be
889 re-engineered. */
b9787581 890 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
68435eb2
RB
891 tree lhs = gimple_get_lhs (stmt);
892 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
893 {
894 tree op = gimple_op (stmt, i);
68435eb2
RB
895 enum vect_def_type dt;
896 if (!op || op == lhs)
897 continue;
894dd753 898 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
68435eb2
RB
899 && (dt == vect_constant_def || dt == vect_external_def))
900 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
901 i, dt, cost_vec);
902 }
903 }
904 else
905 /* Cost the "broadcast" of a scalar operand in to a vector operand.
906 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
907 cost model. */
908 for (int i = 0; i < ndts; i++)
909 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
910 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
911 stmt_info, 0, vect_prologue);
912
913 /* Adjust for two-operator SLP nodes. */
914 if (node && SLP_TREE_TWO_OPERATORS (node))
915 {
916 ncopies *= 2;
917 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
918 stmt_info, 0, vect_body);
919 }
c3e7ee41
BS
920
921 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
922 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
923 stmt_info, 0, vect_body);
c3e7ee41 924
73fbfcad 925 if (dump_enabled_p ())
78c60e3d
SS
926 dump_printf_loc (MSG_NOTE, vect_location,
927 "vect_model_simple_cost: inside_cost = %d, "
e645e942 928 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
929}
930
931
8bd37302
BS
932/* Model cost for type demotion and promotion operations. PWR is normally
933 zero for single-step promotions and demotions. It will be one if
934 two-step promotion/demotion is required, and so on. Each additional
935 step doubles the number of instructions required. */
936
937static void
938vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
939 enum vect_def_type *dt, int pwr,
940 stmt_vector_for_cost *cost_vec)
8bd37302
BS
941{
942 int i, tmp;
92345349 943 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 944
8bd37302
BS
945 for (i = 0; i < pwr + 1; i++)
946 {
947 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
948 (i + 1) : i;
68435eb2
RB
949 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
950 vec_promote_demote, stmt_info, 0,
951 vect_body);
8bd37302
BS
952 }
953
954 /* FORNOW: Assuming maximum 2 args per stmts. */
955 for (i = 0; i < 2; i++)
92345349 956 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
957 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
958 stmt_info, 0, vect_prologue);
8bd37302 959
73fbfcad 960 if (dump_enabled_p ())
78c60e3d
SS
961 dump_printf_loc (MSG_NOTE, vect_location,
962 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 963 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
964}
965
ebfd146a
IR
966/* Function vect_model_store_cost
967
0d0293ac
MM
968 Models cost for stores. In the case of grouped accesses, one access
969 has the overhead of the grouped access attributed to it. */
ebfd146a 970
68435eb2 971static void
b8698a0f 972vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 973 enum vect_def_type dt,
2de001ee 974 vect_memory_access_type memory_access_type,
9ce4345a 975 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 976 stmt_vector_for_cost *cost_vec)
ebfd146a 977{
92345349 978 unsigned int inside_cost = 0, prologue_cost = 0;
bffb8014 979 stmt_vec_info first_stmt_info = stmt_info;
892a981f 980 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 981
68435eb2
RB
982 /* ??? Somehow we need to fix this at the callers. */
983 if (slp_node)
984 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
985
9ce4345a 986 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
987 {
988 if (slp_node)
989 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
990 1, dt, cost_vec);
991 else
992 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
993 stmt_info, 0, vect_prologue);
994 }
ebfd146a 995
892a981f
RS
996 /* Grouped stores update all elements in the group at once,
997 so we want the DR for the first statement. */
998 if (!slp_node && grouped_access_p)
bffb8014 999 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1000
892a981f
RS
1001 /* True if we should include any once-per-group costs as well as
1002 the cost of the statement itself. For SLP we only get called
1003 once per group anyhow. */
bffb8014 1004 bool first_stmt_p = (first_stmt_info == stmt_info);
892a981f 1005
272c6793 1006 /* We assume that the cost of a single store-lanes instruction is
2c53b149 1007 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
272c6793 1008 access is instead being provided by a permute-and-store operation,
2de001ee
RS
1009 include the cost of the permutes. */
1010 if (first_stmt_p
1011 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1012 {
e1377713
ES
1013 /* Uses a high and low interleave or shuffle operations for each
1014 needed permute. */
bffb8014 1015 int group_size = DR_GROUP_SIZE (first_stmt_info);
e1377713 1016 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1017 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1018 stmt_info, 0, vect_body);
ebfd146a 1019
73fbfcad 1020 if (dump_enabled_p ())
78c60e3d 1021 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1022 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1023 group_size);
ebfd146a
IR
1024 }
1025
cee62fee 1026 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1027 /* Costs of the stores. */
067bc855
RB
1028 if (memory_access_type == VMAT_ELEMENTWISE
1029 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1030 {
1031 /* N scalar stores plus extracting the elements. */
1032 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1033 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1034 ncopies * assumed_nunits,
1035 scalar_store, stmt_info, 0, vect_body);
1036 }
f2e2a985 1037 else
57c454d2 1038 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
ebfd146a 1039
2de001ee
RS
1040 if (memory_access_type == VMAT_ELEMENTWISE
1041 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1042 {
1043 /* N scalar stores plus extracting the elements. */
1044 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1045 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1046 ncopies * assumed_nunits,
1047 vec_to_scalar, stmt_info, 0, vect_body);
1048 }
cee62fee 1049
73fbfcad 1050 if (dump_enabled_p ())
78c60e3d
SS
1051 dump_printf_loc (MSG_NOTE, vect_location,
1052 "vect_model_store_cost: inside_cost = %d, "
e645e942 1053 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1054}
1055
1056
720f5239
IR
1057/* Calculate cost of DR's memory access. */
1058void
57c454d2 1059vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1060 unsigned int *inside_cost,
92345349 1061 stmt_vector_for_cost *body_cost_vec)
720f5239 1062{
57c454d2 1063 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1064 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1065
1066 switch (alignment_support_scheme)
1067 {
1068 case dr_aligned:
1069 {
92345349
BS
1070 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 vector_store, stmt_info, 0,
1072 vect_body);
720f5239 1073
73fbfcad 1074 if (dump_enabled_p ())
78c60e3d 1075 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1076 "vect_model_store_cost: aligned.\n");
720f5239
IR
1077 break;
1078 }
1079
1080 case dr_unaligned_supported:
1081 {
720f5239 1082 /* Here, we assign an additional cost for the unaligned store. */
92345349 1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1084 unaligned_store, stmt_info,
92345349 1085 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1086 if (dump_enabled_p ())
78c60e3d
SS
1087 dump_printf_loc (MSG_NOTE, vect_location,
1088 "vect_model_store_cost: unaligned supported by "
e645e942 1089 "hardware.\n");
720f5239
IR
1090 break;
1091 }
1092
38eec4c6
UW
1093 case dr_unaligned_unsupported:
1094 {
1095 *inside_cost = VECT_MAX_COST;
1096
73fbfcad 1097 if (dump_enabled_p ())
78c60e3d 1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1099 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1100 break;
1101 }
1102
720f5239
IR
1103 default:
1104 gcc_unreachable ();
1105 }
1106}
1107
1108
ebfd146a
IR
1109/* Function vect_model_load_cost
1110
892a981f
RS
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1113 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1114 access scheme chosen. */
1115
68435eb2
RB
1116static void
1117vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1118 vect_memory_access_type memory_access_type,
68435eb2 1119 slp_instance instance,
2de001ee 1120 slp_tree slp_node,
68435eb2 1121 stmt_vector_for_cost *cost_vec)
ebfd146a 1122{
92345349 1123 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1124 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1125
68435eb2
RB
1126 gcc_assert (cost_vec);
1127
1128 /* ??? Somehow we need to fix this at the callers. */
1129 if (slp_node)
1130 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1131
1132 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1133 {
1134 /* If the load is permuted then the alignment is determined by
1135 the first group element not by the first scalar stmt DR. */
bffb8014 1136 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
68435eb2
RB
1137 /* Record the cost for the permutation. */
1138 unsigned n_perms;
1139 unsigned assumed_nunits
bffb8014 1140 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
68435eb2
RB
1141 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1142 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1143 slp_vf, instance, true,
1144 &n_perms);
1145 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
bffb8014 1146 first_stmt_info, 0, vect_body);
68435eb2
RB
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
bffb8014 1149 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
68435eb2
RB
1150 bitmap_clear (perm);
1151 for (unsigned i = 0;
1152 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1153 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1154 ncopies = 0;
1155 bool load_seen = false;
bffb8014 1156 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
68435eb2
RB
1157 {
1158 if (i % assumed_nunits == 0)
1159 {
1160 if (load_seen)
1161 ncopies++;
1162 load_seen = false;
1163 }
1164 if (bitmap_bit_p (perm, i))
1165 load_seen = true;
1166 }
1167 if (load_seen)
1168 ncopies++;
1169 gcc_assert (ncopies
bffb8014
RS
1170 <= (DR_GROUP_SIZE (first_stmt_info)
1171 - DR_GROUP_GAP (first_stmt_info)
68435eb2
RB
1172 + assumed_nunits - 1) / assumed_nunits);
1173 }
1174
892a981f
RS
1175 /* Grouped loads read all elements in the group at once,
1176 so we want the DR for the first statement. */
bffb8014 1177 stmt_vec_info first_stmt_info = stmt_info;
892a981f 1178 if (!slp_node && grouped_access_p)
bffb8014 1179 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1180
892a981f
RS
1181 /* True if we should include any once-per-group costs as well as
1182 the cost of the statement itself. For SLP we only get called
1183 once per group anyhow. */
bffb8014 1184 bool first_stmt_p = (first_stmt_info == stmt_info);
892a981f 1185
272c6793 1186 /* We assume that the cost of a single load-lanes instruction is
2c53b149 1187 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
272c6793 1188 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1189 include the cost of the permutes. */
1190 if (first_stmt_p
1191 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1192 {
2c23db6d
ES
1193 /* Uses an even and odd extract operations or shuffle operations
1194 for each needed permute. */
bffb8014 1195 int group_size = DR_GROUP_SIZE (first_stmt_info);
2c23db6d 1196 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1197 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1198 stmt_info, 0, vect_body);
ebfd146a 1199
73fbfcad 1200 if (dump_enabled_p ())
e645e942
TJ
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1203 group_size);
ebfd146a
IR
1204 }
1205
1206 /* The loads themselves. */
067bc855
RB
1207 if (memory_access_type == VMAT_ELEMENTWISE
1208 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1209 {
a21892ad
BS
1210 /* N scalar loads plus gathering them into a vector. */
1211 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1212 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1213 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1214 ncopies * assumed_nunits,
92345349 1215 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1216 }
1217 else
57c454d2 1218 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
92345349 1219 &inside_cost, &prologue_cost,
68435eb2 1220 cost_vec, cost_vec, true);
2de001ee
RS
1221 if (memory_access_type == VMAT_ELEMENTWISE
1222 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1223 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1224 stmt_info, 0, vect_body);
720f5239 1225
73fbfcad 1226 if (dump_enabled_p ())
78c60e3d
SS
1227 dump_printf_loc (MSG_NOTE, vect_location,
1228 "vect_model_load_cost: inside_cost = %d, "
e645e942 1229 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1230}
1231
1232
1233/* Calculate cost of DR's memory access. */
1234void
57c454d2 1235vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1236 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1237 unsigned int *prologue_cost,
1238 stmt_vector_for_cost *prologue_cost_vec,
1239 stmt_vector_for_cost *body_cost_vec,
1240 bool record_prologue_costs)
720f5239 1241{
57c454d2 1242 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1243 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1244
1245 switch (alignment_support_scheme)
ebfd146a
IR
1246 {
1247 case dr_aligned:
1248 {
92345349
BS
1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1250 stmt_info, 0, vect_body);
ebfd146a 1251
73fbfcad 1252 if (dump_enabled_p ())
78c60e3d 1253 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1254 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1255
1256 break;
1257 }
1258 case dr_unaligned_supported:
1259 {
720f5239 1260 /* Here, we assign an additional cost for the unaligned load. */
92345349 1261 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1262 unaligned_load, stmt_info,
92345349 1263 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1264
73fbfcad 1265 if (dump_enabled_p ())
78c60e3d
SS
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "vect_model_load_cost: unaligned supported by "
e645e942 1268 "hardware.\n");
ebfd146a
IR
1269
1270 break;
1271 }
1272 case dr_explicit_realign:
1273 {
92345349
BS
1274 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1275 vector_load, stmt_info, 0, vect_body);
1276 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1277 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1278
1279 /* FIXME: If the misalignment remains fixed across the iterations of
1280 the containing loop, the following cost should be added to the
92345349 1281 prologue costs. */
ebfd146a 1282 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1283 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1284 stmt_info, 0, vect_body);
ebfd146a 1285
73fbfcad 1286 if (dump_enabled_p ())
e645e942
TJ
1287 dump_printf_loc (MSG_NOTE, vect_location,
1288 "vect_model_load_cost: explicit realign\n");
8bd37302 1289
ebfd146a
IR
1290 break;
1291 }
1292 case dr_explicit_realign_optimized:
1293 {
73fbfcad 1294 if (dump_enabled_p ())
e645e942 1295 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1296 "vect_model_load_cost: unaligned software "
e645e942 1297 "pipelined.\n");
ebfd146a
IR
1298
1299 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1300 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1301 if this is an access in a group of loads, which provide grouped
ebfd146a 1302 access, then the above cost should only be considered for one
ff802fa1 1303 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1304 and a realignment op. */
1305
92345349 1306 if (add_realign_cost && record_prologue_costs)
ebfd146a 1307 {
92345349
BS
1308 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1309 vector_stmt, stmt_info,
1310 0, vect_prologue);
ebfd146a 1311 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1312 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1313 vector_stmt, stmt_info,
1314 0, vect_prologue);
ebfd146a
IR
1315 }
1316
92345349
BS
1317 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1318 stmt_info, 0, vect_body);
1319 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1320 stmt_info, 0, vect_body);
8bd37302 1321
73fbfcad 1322 if (dump_enabled_p ())
78c60e3d 1323 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1324 "vect_model_load_cost: explicit realign optimized"
1325 "\n");
8bd37302 1326
ebfd146a
IR
1327 break;
1328 }
1329
38eec4c6
UW
1330 case dr_unaligned_unsupported:
1331 {
1332 *inside_cost = VECT_MAX_COST;
1333
73fbfcad 1334 if (dump_enabled_p ())
78c60e3d 1335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1336 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1337 break;
1338 }
1339
ebfd146a
IR
1340 default:
1341 gcc_unreachable ();
1342 }
ebfd146a
IR
1343}
1344
418b7df3
RG
1345/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1346 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1347
418b7df3 1348static void
355fe088 1349vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1350{
ebfd146a 1351 if (gsi)
418b7df3 1352 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1353 else
1354 {
418b7df3 1355 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1356 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1357
a70d6342
IR
1358 if (loop_vinfo)
1359 {
1360 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1361 basic_block new_bb;
1362 edge pe;
a70d6342
IR
1363
1364 if (nested_in_vect_loop_p (loop, stmt))
1365 loop = loop->inner;
b8698a0f 1366
a70d6342 1367 pe = loop_preheader_edge (loop);
418b7df3 1368 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1369 gcc_assert (!new_bb);
1370 }
1371 else
1372 {
1373 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1374 basic_block bb;
1375 gimple_stmt_iterator gsi_bb_start;
1376
1377 gcc_assert (bb_vinfo);
1378 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1379 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1380 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1381 }
ebfd146a
IR
1382 }
1383
73fbfcad 1384 if (dump_enabled_p ())
ebfd146a 1385 {
78c60e3d
SS
1386 dump_printf_loc (MSG_NOTE, vect_location,
1387 "created new init_stmt: ");
1388 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1389 }
418b7df3
RG
1390}
1391
1392/* Function vect_init_vector.
ebfd146a 1393
5467ee52
RG
1394 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1395 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1396 vector type a vector with all elements equal to VAL is created first.
1397 Place the initialization at BSI if it is not NULL. Otherwise, place the
1398 initialization at the loop preheader.
418b7df3
RG
1399 Return the DEF of INIT_STMT.
1400 It will be used in the vectorization of STMT. */
1401
1402tree
355fe088 1403vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1404{
355fe088 1405 gimple *init_stmt;
418b7df3
RG
1406 tree new_temp;
1407
e412ece4
RB
1408 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1409 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1410 {
e412ece4
RB
1411 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1412 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1413 {
5a308cf1
IE
1414 /* Scalar boolean value should be transformed into
1415 all zeros or all ones value before building a vector. */
1416 if (VECTOR_BOOLEAN_TYPE_P (type))
1417 {
b3d51f23
IE
1418 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1419 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1420
1421 if (CONSTANT_CLASS_P (val))
1422 val = integer_zerop (val) ? false_val : true_val;
1423 else
1424 {
1425 new_temp = make_ssa_name (TREE_TYPE (type));
1426 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1427 val, true_val, false_val);
1428 vect_init_vector_1 (stmt, init_stmt, gsi);
1429 val = new_temp;
1430 }
1431 }
1432 else if (CONSTANT_CLASS_P (val))
42fd8198 1433 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1434 else
1435 {
b731b390 1436 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1437 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1438 init_stmt = gimple_build_assign (new_temp,
1439 fold_build1 (VIEW_CONVERT_EXPR,
1440 TREE_TYPE (type),
1441 val));
1442 else
1443 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1444 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1445 val = new_temp;
418b7df3
RG
1446 }
1447 }
5467ee52 1448 val = build_vector_from_val (type, val);
418b7df3
RG
1449 }
1450
0e22bb5a
RB
1451 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1452 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1453 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1454 return new_temp;
ebfd146a
IR
1455}
1456
c83a894c 1457/* Function vect_get_vec_def_for_operand_1.
a70d6342 1458
c83a894c
AH
1459 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1460 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1461
1462tree
c83a894c 1463vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1464{
1465 tree vec_oprnd;
1eede195 1466 stmt_vec_info vec_stmt_info;
ebfd146a 1467 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1468
1469 switch (dt)
1470 {
81c40241 1471 /* operand is a constant or a loop invariant. */
ebfd146a 1472 case vect_constant_def:
81c40241 1473 case vect_external_def:
c83a894c
AH
1474 /* Code should use vect_get_vec_def_for_operand. */
1475 gcc_unreachable ();
ebfd146a 1476
81c40241 1477 /* operand is defined inside the loop. */
8644a673 1478 case vect_internal_def:
ebfd146a 1479 {
ebfd146a
IR
1480 /* Get the def from the vectorized stmt. */
1481 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1482
1eede195
RS
1483 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1484 /* Get vectorized pattern statement. */
1485 if (!vec_stmt_info
1486 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1487 && !STMT_VINFO_RELEVANT (def_stmt_info))
1488 vec_stmt_info = (STMT_VINFO_VEC_STMT
1489 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1490 gcc_assert (vec_stmt_info);
1491 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1492 vec_oprnd = PHI_RESULT (phi);
ebfd146a 1493 else
1eede195
RS
1494 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1495 return vec_oprnd;
ebfd146a
IR
1496 }
1497
c78e3652 1498 /* operand is defined by a loop header phi. */
ebfd146a 1499 case vect_reduction_def:
06066f92 1500 case vect_double_reduction_def:
7c5222ff 1501 case vect_nested_cycle:
ebfd146a
IR
1502 case vect_induction_def:
1503 {
1504 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1505
1eede195
RS
1506 /* Get the def from the vectorized stmt. */
1507 def_stmt_info = vinfo_for_stmt (def_stmt);
1508 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1509 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1510 vec_oprnd = PHI_RESULT (phi);
6dbbece6 1511 else
1eede195
RS
1512 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1513 return vec_oprnd;
ebfd146a
IR
1514 }
1515
1516 default:
1517 gcc_unreachable ();
1518 }
1519}
1520
1521
c83a894c
AH
1522/* Function vect_get_vec_def_for_operand.
1523
1524 OP is an operand in STMT. This function returns a (vector) def that will be
1525 used in the vectorized stmt for STMT.
1526
1527 In the case that OP is an SSA_NAME which is defined in the loop, then
1528 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1529
1530 In case OP is an invariant or constant, a new stmt that creates a vector def
1531 needs to be introduced. VECTYPE may be used to specify a required type for
1532 vector invariant. */
1533
1534tree
1535vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1536{
1537 gimple *def_stmt;
1538 enum vect_def_type dt;
1539 bool is_simple_use;
1540 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1541 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1542
1543 if (dump_enabled_p ())
1544 {
1545 dump_printf_loc (MSG_NOTE, vect_location,
1546 "vect_get_vec_def_for_operand: ");
1547 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1548 dump_printf (MSG_NOTE, "\n");
1549 }
1550
fef96d8e
RS
1551 stmt_vec_info def_stmt_info;
1552 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1553 &def_stmt_info, &def_stmt);
c83a894c
AH
1554 gcc_assert (is_simple_use);
1555 if (def_stmt && dump_enabled_p ())
1556 {
1557 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1558 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1559 }
1560
1561 if (dt == vect_constant_def || dt == vect_external_def)
1562 {
1563 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1564 tree vector_type;
1565
1566 if (vectype)
1567 vector_type = vectype;
2568d8a1 1568 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1569 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1570 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1571 else
1572 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1573
1574 gcc_assert (vector_type);
1575 return vect_init_vector (stmt, op, vector_type, NULL);
1576 }
1577 else
fef96d8e 1578 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
c83a894c
AH
1579}
1580
1581
ebfd146a
IR
1582/* Function vect_get_vec_def_for_stmt_copy
1583
ff802fa1 1584 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1585 vectorized stmt to be created (by the caller to this function) is a "copy"
1586 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1587 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1588 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1589 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1590 DT is the type of the vector def VEC_OPRND.
1591
1592 Context:
1593 In case the vectorization factor (VF) is bigger than the number
1594 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1595 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1596 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1597 smallest data-type determines the VF, and as a result, when vectorizing
1598 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1599 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1600 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1601 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1602 which VF=16 and nunits=4, so the number of copies required is 4):
1603
1604 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1605
ebfd146a
IR
1606 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1607 VS1.1: vx.1 = memref1 VS1.2
1608 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1609 VS1.3: vx.3 = memref3
ebfd146a
IR
1610
1611 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1612 VSnew.1: vz1 = vx.1 + ... VSnew.2
1613 VSnew.2: vz2 = vx.2 + ... VSnew.3
1614 VSnew.3: vz3 = vx.3 + ...
1615
1616 The vectorization of S1 is explained in vectorizable_load.
1617 The vectorization of S2:
b8698a0f
L
1618 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1619 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1620 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1621 returns the vector-def 'vx.0'.
1622
b8698a0f
L
1623 To create the remaining copies of the vector-stmt (VSnew.j), this
1624 function is called to get the relevant vector-def for each operand. It is
1625 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1626 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1627
b8698a0f
L
1628 For example, to obtain the vector-def 'vx.1' in order to create the
1629 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1630 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1631 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1632 and return its def ('vx.1').
1633 Overall, to create the above sequence this function will be called 3 times:
1634 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1635 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1636 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1637
1638tree
1639vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1640{
355fe088 1641 gimple *vec_stmt_for_operand;
ebfd146a
IR
1642 stmt_vec_info def_stmt_info;
1643
1644 /* Do nothing; can reuse same def. */
8644a673 1645 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1646 return vec_oprnd;
1647
1648 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1649 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1650 gcc_assert (def_stmt_info);
1651 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1652 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1653 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1654 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1655 else
1656 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1657 return vec_oprnd;
1658}
1659
1660
1661/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1662 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1663
c78e3652 1664void
b8698a0f 1665vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1666 vec<tree> *vec_oprnds0,
1667 vec<tree> *vec_oprnds1)
ebfd146a 1668{
9771b263 1669 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1670
1671 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1672 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1673
9771b263 1674 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1675 {
9771b263 1676 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1677 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1678 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1679 }
1680}
1681
1682
c78e3652 1683/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1684
c78e3652 1685void
355fe088 1686vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1687 vec<tree> *vec_oprnds0,
1688 vec<tree> *vec_oprnds1,
306b0c92 1689 slp_tree slp_node)
ebfd146a
IR
1690{
1691 if (slp_node)
d092494c
IR
1692 {
1693 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1694 auto_vec<tree> ops (nops);
1695 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1696
9771b263 1697 ops.quick_push (op0);
d092494c 1698 if (op1)
9771b263 1699 ops.quick_push (op1);
d092494c 1700
306b0c92 1701 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1702
37b5ec8f 1703 *vec_oprnds0 = vec_defs[0];
d092494c 1704 if (op1)
37b5ec8f 1705 *vec_oprnds1 = vec_defs[1];
d092494c 1706 }
ebfd146a
IR
1707 else
1708 {
1709 tree vec_oprnd;
1710
9771b263 1711 vec_oprnds0->create (1);
81c40241 1712 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1713 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1714
1715 if (op1)
1716 {
9771b263 1717 vec_oprnds1->create (1);
81c40241 1718 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1719 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1720 }
1721 }
1722}
1723
bb6c2b68
RS
1724/* Helper function called by vect_finish_replace_stmt and
1725 vect_finish_stmt_generation. Set the location of the new
e1bd7296 1726 statement and create and return a stmt_vec_info for it. */
bb6c2b68 1727
e1bd7296 1728static stmt_vec_info
bb6c2b68
RS
1729vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1730{
1731 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1732 vec_info *vinfo = stmt_info->vinfo;
1733
e1bd7296 1734 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
bb6c2b68
RS
1735
1736 if (dump_enabled_p ())
1737 {
1738 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1739 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1740 }
1741
1742 gimple_set_location (vec_stmt, gimple_location (stmt));
1743
1744 /* While EH edges will generally prevent vectorization, stmt might
1745 e.g. be in a must-not-throw region. Ensure newly created stmts
1746 that could throw are part of the same region. */
1747 int lp_nr = lookup_stmt_eh_lp (stmt);
1748 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1749 add_stmt_to_eh_lp (vec_stmt, lp_nr);
e1bd7296
RS
1750
1751 return vec_stmt_info;
bb6c2b68
RS
1752}
1753
1754/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
e1bd7296
RS
1755 which sets the same scalar result as STMT did. Create and return a
1756 stmt_vec_info for VEC_STMT. */
bb6c2b68 1757
e1bd7296 1758stmt_vec_info
bb6c2b68
RS
1759vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1760{
1761 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1762
1763 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1764 gsi_replace (&gsi, vec_stmt, false);
1765
e1bd7296 1766 return vect_finish_stmt_generation_1 (stmt, vec_stmt);
bb6c2b68 1767}
ebfd146a 1768
e1bd7296
RS
1769/* Add VEC_STMT to the vectorized implementation of STMT and insert it
1770 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
ebfd146a 1771
e1bd7296 1772stmt_vec_info
355fe088 1773vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1774 gimple_stmt_iterator *gsi)
1775{
ebfd146a
IR
1776 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1777
54e8e2c3
RG
1778 if (!gsi_end_p (*gsi)
1779 && gimple_has_mem_ops (vec_stmt))
1780 {
355fe088 1781 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1782 tree vuse = gimple_vuse (at_stmt);
1783 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1784 {
1785 tree vdef = gimple_vdef (at_stmt);
1786 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1787 /* If we have an SSA vuse and insert a store, update virtual
1788 SSA form to avoid triggering the renamer. Do so only
1789 if we can easily see all uses - which is what almost always
1790 happens with the way vectorized stmts are inserted. */
1791 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1792 && ((is_gimple_assign (vec_stmt)
1793 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1794 || (is_gimple_call (vec_stmt)
1795 && !(gimple_call_flags (vec_stmt)
1796 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1797 {
1798 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1799 gimple_set_vdef (vec_stmt, new_vdef);
1800 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1801 }
1802 }
1803 }
ebfd146a 1804 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
e1bd7296 1805 return vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1806}
1807
70439f0d
RS
1808/* We want to vectorize a call to combined function CFN with function
1809 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1810 as the types of all inputs. Check whether this is possible using
1811 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1812
70439f0d
RS
1813static internal_fn
1814vectorizable_internal_function (combined_fn cfn, tree fndecl,
1815 tree vectype_out, tree vectype_in)
ebfd146a 1816{
70439f0d
RS
1817 internal_fn ifn;
1818 if (internal_fn_p (cfn))
1819 ifn = as_internal_fn (cfn);
1820 else
1821 ifn = associated_internal_fn (fndecl);
1822 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1823 {
1824 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1825 if (info.vectorizable)
1826 {
1827 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1828 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1829 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1830 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1831 return ifn;
1832 }
1833 }
1834 return IFN_LAST;
ebfd146a
IR
1835}
1836
5ce9450f 1837
355fe088 1838static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1839 gimple_stmt_iterator *);
1840
7cfb4d93
RS
1841/* Check whether a load or store statement in the loop described by
1842 LOOP_VINFO is possible in a fully-masked loop. This is testing
1843 whether the vectorizer pass has the appropriate support, as well as
1844 whether the target does.
1845
1846 VLS_TYPE says whether the statement is a load or store and VECTYPE
1847 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1848 says how the load or store is going to be implemented and GROUP_SIZE
1849 is the number of load or store statements in the containing group.
bfaa08b7
RS
1850 If the access is a gather load or scatter store, GS_INFO describes
1851 its arguments.
7cfb4d93
RS
1852
1853 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1854 supported, otherwise record the required mask types. */
1855
1856static void
1857check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1858 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1859 vect_memory_access_type memory_access_type,
1860 gather_scatter_info *gs_info)
7cfb4d93
RS
1861{
1862 /* Invariant loads need no special support. */
1863 if (memory_access_type == VMAT_INVARIANT)
1864 return;
1865
1866 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1867 machine_mode vecmode = TYPE_MODE (vectype);
1868 bool is_load = (vls_type == VLS_LOAD);
1869 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1870 {
1871 if (is_load
1872 ? !vect_load_lanes_supported (vectype, group_size, true)
1873 : !vect_store_lanes_supported (vectype, group_size, true))
1874 {
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1877 "can't use a fully-masked loop because the"
1878 " target doesn't have an appropriate masked"
1879 " load/store-lanes instruction.\n");
1880 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1881 return;
1882 }
1883 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1884 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1885 return;
1886 }
1887
bfaa08b7
RS
1888 if (memory_access_type == VMAT_GATHER_SCATTER)
1889 {
f307441a
RS
1890 internal_fn ifn = (is_load
1891 ? IFN_MASK_GATHER_LOAD
1892 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1893 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1894 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1895 gs_info->memory_type,
1896 TYPE_SIGN (offset_type),
1897 gs_info->scale))
1898 {
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1901 "can't use a fully-masked loop because the"
1902 " target doesn't have an appropriate masked"
f307441a 1903 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1904 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1905 return;
1906 }
1907 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1908 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1909 return;
1910 }
1911
7cfb4d93
RS
1912 if (memory_access_type != VMAT_CONTIGUOUS
1913 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1914 {
1915 /* Element X of the data must come from iteration i * VF + X of the
1916 scalar loop. We need more work to support other mappings. */
1917 if (dump_enabled_p ())
1918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1919 "can't use a fully-masked loop because an access"
1920 " isn't contiguous.\n");
1921 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1922 return;
1923 }
1924
1925 machine_mode mask_mode;
1926 if (!(targetm.vectorize.get_mask_mode
1927 (GET_MODE_NUNITS (vecmode),
1928 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1929 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1930 {
1931 if (dump_enabled_p ())
1932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1933 "can't use a fully-masked loop because the target"
1934 " doesn't have the appropriate masked load or"
1935 " store.\n");
1936 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1937 return;
1938 }
1939 /* We might load more scalars than we need for permuting SLP loads.
1940 We checked in get_group_load_store_type that the extra elements
1941 don't leak into a new vector. */
1942 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1943 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1944 unsigned int nvectors;
1945 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1946 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1947 else
1948 gcc_unreachable ();
1949}
1950
1951/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1952 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1953 that needs to be applied to all loads and stores in a vectorized loop.
1954 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1955
1956 MASK_TYPE is the type of both masks. If new statements are needed,
1957 insert them before GSI. */
1958
1959static tree
1960prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1961 gimple_stmt_iterator *gsi)
1962{
1963 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1964 if (!loop_mask)
1965 return vec_mask;
1966
1967 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1968 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1969 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1970 vec_mask, loop_mask);
1971 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1972 return and_res;
1973}
1974
429ef523
RS
1975/* Determine whether we can use a gather load or scatter store to vectorize
1976 strided load or store STMT by truncating the current offset to a smaller
1977 width. We need to be able to construct an offset vector:
1978
1979 { 0, X, X*2, X*3, ... }
1980
1981 without loss of precision, where X is STMT's DR_STEP.
1982
1983 Return true if this is possible, describing the gather load or scatter
1984 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1985
1986static bool
1987vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1988 bool masked_p,
1989 gather_scatter_info *gs_info)
1990{
1991 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1992 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1993 tree step = DR_STEP (dr);
1994 if (TREE_CODE (step) != INTEGER_CST)
1995 {
1996 /* ??? Perhaps we could use range information here? */
1997 if (dump_enabled_p ())
1998 dump_printf_loc (MSG_NOTE, vect_location,
1999 "cannot truncate variable step.\n");
2000 return false;
2001 }
2002
2003 /* Get the number of bits in an element. */
2004 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2005 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2006 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2007
2008 /* Set COUNT to the upper limit on the number of elements - 1.
2009 Start with the maximum vectorization factor. */
2010 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2011
2012 /* Try lowering COUNT to the number of scalar latch iterations. */
2013 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2014 widest_int max_iters;
2015 if (max_loop_iterations (loop, &max_iters)
2016 && max_iters < count)
2017 count = max_iters.to_shwi ();
2018
2019 /* Try scales of 1 and the element size. */
2020 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
4a669ac3 2021 wi::overflow_type overflow = wi::OVF_NONE;
429ef523
RS
2022 for (int i = 0; i < 2; ++i)
2023 {
2024 int scale = scales[i];
2025 widest_int factor;
2026 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2027 continue;
2028
2029 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2030 in OFFSET_BITS bits. */
4a669ac3
AH
2031 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2032 if (overflow)
429ef523
RS
2033 continue;
2034 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2035 if (wi::min_precision (range, sign) > element_bits)
2036 {
4a669ac3 2037 overflow = wi::OVF_UNKNOWN;
429ef523
RS
2038 continue;
2039 }
2040
2041 /* See whether the target supports the operation. */
2042 tree memory_type = TREE_TYPE (DR_REF (dr));
2043 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2044 memory_type, element_bits, sign, scale,
2045 &gs_info->ifn, &gs_info->element_type))
2046 continue;
2047
2048 tree offset_type = build_nonstandard_integer_type (element_bits,
2049 sign == UNSIGNED);
2050
2051 gs_info->decl = NULL_TREE;
2052 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2053 but we don't need to store that here. */
2054 gs_info->base = NULL_TREE;
2055 gs_info->offset = fold_convert (offset_type, step);
929b4411 2056 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2057 gs_info->offset_vectype = NULL_TREE;
2058 gs_info->scale = scale;
2059 gs_info->memory_type = memory_type;
2060 return true;
2061 }
2062
4a669ac3 2063 if (overflow && dump_enabled_p ())
429ef523
RS
2064 dump_printf_loc (MSG_NOTE, vect_location,
2065 "truncating gather/scatter offset to %d bits"
2066 " might change its value.\n", element_bits);
2067
2068 return false;
2069}
2070
ab2fc782
RS
2071/* Return true if we can use gather/scatter internal functions to
2072 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
2073 MASKED_P is true if load or store is conditional. When returning
2074 true, fill in GS_INFO with the information required to perform the
2075 operation. */
ab2fc782
RS
2076
2077static bool
2078vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 2079 bool masked_p,
ab2fc782
RS
2080 gather_scatter_info *gs_info)
2081{
2082 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2083 || gs_info->decl)
429ef523
RS
2084 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2085 masked_p, gs_info);
ab2fc782
RS
2086
2087 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2088 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2089 tree offset_type = TREE_TYPE (gs_info->offset);
2090 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2091
2092 /* Enforced by vect_check_gather_scatter. */
2093 gcc_assert (element_bits >= offset_bits);
2094
2095 /* If the elements are wider than the offset, convert the offset to the
2096 same width, without changing its sign. */
2097 if (element_bits > offset_bits)
2098 {
2099 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2100 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2101 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2102 }
2103
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_NOTE, vect_location,
2106 "using gather/scatter for strided/grouped access,"
2107 " scale = %d\n", gs_info->scale);
2108
2109 return true;
2110}
2111
62da9e14
RS
2112/* STMT is a non-strided load or store, meaning that it accesses
2113 elements with a known constant step. Return -1 if that step
2114 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2115
2116static int
2117compare_step_with_zero (gimple *stmt)
2118{
2119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2120 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2121 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2122 size_zero_node);
62da9e14
RS
2123}
2124
2125/* If the target supports a permute mask that reverses the elements in
2126 a vector of type VECTYPE, return that mask, otherwise return null. */
2127
2128static tree
2129perm_mask_for_reverse (tree vectype)
2130{
928686b1 2131 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2132
d980067b
RS
2133 /* The encoding has a single stepped pattern. */
2134 vec_perm_builder sel (nunits, 1, 3);
928686b1 2135 for (int i = 0; i < 3; ++i)
908a1a16 2136 sel.quick_push (nunits - 1 - i);
62da9e14 2137
e3342de4
RS
2138 vec_perm_indices indices (sel, 1, nunits);
2139 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2140 return NULL_TREE;
e3342de4 2141 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2142}
5ce9450f 2143
c3a8f964
RS
2144/* STMT is either a masked or unconditional store. Return the value
2145 being stored. */
2146
f307441a 2147tree
c3a8f964
RS
2148vect_get_store_rhs (gimple *stmt)
2149{
2150 if (gassign *assign = dyn_cast <gassign *> (stmt))
2151 {
2152 gcc_assert (gimple_assign_single_p (assign));
2153 return gimple_assign_rhs1 (assign);
2154 }
2155 if (gcall *call = dyn_cast <gcall *> (stmt))
2156 {
2157 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2158 int index = internal_fn_stored_value_index (ifn);
2159 gcc_assert (index >= 0);
2160 return gimple_call_arg (stmt, index);
c3a8f964
RS
2161 }
2162 gcc_unreachable ();
2163}
2164
2de001ee
RS
2165/* A subroutine of get_load_store_type, with a subset of the same
2166 arguments. Handle the case where STMT is part of a grouped load
2167 or store.
2168
2169 For stores, the statements in the group are all consecutive
2170 and there is no gap at the end. For loads, the statements in the
2171 group might not be consecutive; there can be gaps between statements
2172 as well as at the end. */
2173
2174static bool
2175get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2176 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2177 vect_memory_access_type *memory_access_type,
2178 gather_scatter_info *gs_info)
2de001ee
RS
2179{
2180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2181 vec_info *vinfo = stmt_info->vinfo;
2182 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2183 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
bffb8014
RS
2184 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2185 data_reference *first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
2186 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2187 bool single_element_p = (stmt_info == first_stmt_info
2c53b149 2188 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
bffb8014 2189 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
928686b1 2190 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2191
2192 /* True if the vectorized statements would access beyond the last
2193 statement in the group. */
2194 bool overrun_p = false;
2195
2196 /* True if we can cope with such overrun by peeling for gaps, so that
2197 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2198 bool can_overrun_p = (!masked_p
2199 && vls_type == VLS_LOAD
2200 && loop_vinfo
2201 && !loop->inner);
2de001ee
RS
2202
2203 /* There can only be a gap at the end of the group if the stride is
2204 known at compile time. */
2205 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2206
2207 /* Stores can't yet have gaps. */
2208 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2209
2210 if (slp)
2211 {
2212 if (STMT_VINFO_STRIDED_P (stmt_info))
2213 {
2c53b149 2214 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2de001ee
RS
2215 separated by the stride, until we have a complete vector.
2216 Fall back to scalar accesses if that isn't possible. */
928686b1 2217 if (multiple_p (nunits, group_size))
2de001ee
RS
2218 *memory_access_type = VMAT_STRIDED_SLP;
2219 else
2220 *memory_access_type = VMAT_ELEMENTWISE;
2221 }
2222 else
2223 {
2224 overrun_p = loop_vinfo && gap != 0;
2225 if (overrun_p && vls_type != VLS_LOAD)
2226 {
2227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2228 "Grouped store with gaps requires"
2229 " non-consecutive accesses\n");
2230 return false;
2231 }
f702e7d4
RS
2232 /* An overrun is fine if the trailing elements are smaller
2233 than the alignment boundary B. Every vector access will
2234 be a multiple of B and so we are guaranteed to access a
2235 non-gap element in the same B-sized block. */
f9ef2c76 2236 if (overrun_p
f702e7d4
RS
2237 && gap < (vect_known_alignment_in_bytes (first_dr)
2238 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2239 overrun_p = false;
2de001ee
RS
2240 if (overrun_p && !can_overrun_p)
2241 {
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244 "Peeling for outer loop is not supported\n");
2245 return false;
2246 }
2247 *memory_access_type = VMAT_CONTIGUOUS;
2248 }
2249 }
2250 else
2251 {
2252 /* We can always handle this case using elementwise accesses,
2253 but see if something more efficient is available. */
2254 *memory_access_type = VMAT_ELEMENTWISE;
2255
2256 /* If there is a gap at the end of the group then these optimizations
2257 would access excess elements in the last iteration. */
2258 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2259 /* An overrun is fine if the trailing elements are smaller than the
2260 alignment boundary B. Every vector access will be a multiple of B
2261 and so we are guaranteed to access a non-gap element in the
2262 same B-sized block. */
f9ef2c76 2263 if (would_overrun_p
7e11fc7f 2264 && !masked_p
f702e7d4
RS
2265 && gap < (vect_known_alignment_in_bytes (first_dr)
2266 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2267 would_overrun_p = false;
f702e7d4 2268
2de001ee 2269 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2270 && (can_overrun_p || !would_overrun_p)
2271 && compare_step_with_zero (stmt) > 0)
2de001ee 2272 {
6737facb
RS
2273 /* First cope with the degenerate case of a single-element
2274 vector. */
2275 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2276 *memory_access_type = VMAT_CONTIGUOUS;
2277
2278 /* Otherwise try using LOAD/STORE_LANES. */
2279 if (*memory_access_type == VMAT_ELEMENTWISE
2280 && (vls_type == VLS_LOAD
7e11fc7f
RS
2281 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2282 : vect_store_lanes_supported (vectype, group_size,
2283 masked_p)))
2de001ee
RS
2284 {
2285 *memory_access_type = VMAT_LOAD_STORE_LANES;
2286 overrun_p = would_overrun_p;
2287 }
2288
2289 /* If that fails, try using permuting loads. */
2290 if (*memory_access_type == VMAT_ELEMENTWISE
2291 && (vls_type == VLS_LOAD
2292 ? vect_grouped_load_supported (vectype, single_element_p,
2293 group_size)
2294 : vect_grouped_store_supported (vectype, group_size)))
2295 {
2296 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2297 overrun_p = would_overrun_p;
2298 }
2299 }
429ef523
RS
2300
2301 /* As a last resort, trying using a gather load or scatter store.
2302
2303 ??? Although the code can handle all group sizes correctly,
2304 it probably isn't a win to use separate strided accesses based
2305 on nearby locations. Or, even if it's a win over scalar code,
2306 it might not be a win over vectorizing at a lower VF, if that
2307 allows us to use contiguous accesses. */
2308 if (*memory_access_type == VMAT_ELEMENTWISE
2309 && single_element_p
2310 && loop_vinfo
2311 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2312 masked_p, gs_info))
2313 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2314 }
2315
bffb8014 2316 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2de001ee
RS
2317 {
2318 /* STMT is the leader of the group. Check the operands of all the
2319 stmts of the group. */
bffb8014
RS
2320 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2321 while (next_stmt_info)
2de001ee 2322 {
bffb8014 2323 tree op = vect_get_store_rhs (next_stmt_info);
2de001ee 2324 enum vect_def_type dt;
894dd753 2325 if (!vect_is_simple_use (op, vinfo, &dt))
2de001ee
RS
2326 {
2327 if (dump_enabled_p ())
2328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2329 "use not simple.\n");
2330 return false;
2331 }
bffb8014 2332 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2de001ee
RS
2333 }
2334 }
2335
2336 if (overrun_p)
2337 {
2338 gcc_assert (can_overrun_p);
2339 if (dump_enabled_p ())
2340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2341 "Data access with gaps requires scalar "
2342 "epilogue loop\n");
2343 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2344 }
2345
2346 return true;
2347}
2348
62da9e14
RS
2349/* A subroutine of get_load_store_type, with a subset of the same
2350 arguments. Handle the case where STMT is a load or store that
2351 accesses consecutive elements with a negative step. */
2352
2353static vect_memory_access_type
2354get_negative_load_store_type (gimple *stmt, tree vectype,
2355 vec_load_store_type vls_type,
2356 unsigned int ncopies)
2357{
2358 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2359 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2360 dr_alignment_support alignment_support_scheme;
2361
2362 if (ncopies > 1)
2363 {
2364 if (dump_enabled_p ())
2365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2366 "multiple types with negative step.\n");
2367 return VMAT_ELEMENTWISE;
2368 }
2369
2370 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2371 if (alignment_support_scheme != dr_aligned
2372 && alignment_support_scheme != dr_unaligned_supported)
2373 {
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "negative step but alignment required.\n");
2377 return VMAT_ELEMENTWISE;
2378 }
2379
2380 if (vls_type == VLS_STORE_INVARIANT)
2381 {
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_NOTE, vect_location,
2384 "negative step with invariant source;"
2385 " no permute needed.\n");
2386 return VMAT_CONTIGUOUS_DOWN;
2387 }
2388
2389 if (!perm_mask_for_reverse (vectype))
2390 {
2391 if (dump_enabled_p ())
2392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2393 "negative step and reversing not supported.\n");
2394 return VMAT_ELEMENTWISE;
2395 }
2396
2397 return VMAT_CONTIGUOUS_REVERSE;
2398}
2399
2de001ee
RS
2400/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2401 if there is a memory access type that the vectorized form can use,
2402 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2403 or scatters, fill in GS_INFO accordingly.
2404
2405 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2406 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2407 VECTYPE is the vector type that the vectorized statements will use.
2408 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2409
2410static bool
7e11fc7f 2411get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2412 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2413 vect_memory_access_type *memory_access_type,
2414 gather_scatter_info *gs_info)
2415{
2416 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2417 vec_info *vinfo = stmt_info->vinfo;
2418 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2419 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2420 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2421 {
2422 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2423 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2424 gcc_unreachable ();
894dd753 2425 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2de001ee
RS
2426 &gs_info->offset_dt,
2427 &gs_info->offset_vectype))
2428 {
2429 if (dump_enabled_p ())
2430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2431 "%s index use not simple.\n",
2432 vls_type == VLS_LOAD ? "gather" : "scatter");
2433 return false;
2434 }
2435 }
2436 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2437 {
7e11fc7f 2438 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2439 memory_access_type, gs_info))
2de001ee
RS
2440 return false;
2441 }
2442 else if (STMT_VINFO_STRIDED_P (stmt_info))
2443 {
2444 gcc_assert (!slp);
ab2fc782 2445 if (loop_vinfo
429ef523
RS
2446 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2447 masked_p, gs_info))
ab2fc782
RS
2448 *memory_access_type = VMAT_GATHER_SCATTER;
2449 else
2450 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2451 }
2452 else
62da9e14
RS
2453 {
2454 int cmp = compare_step_with_zero (stmt);
2455 if (cmp < 0)
2456 *memory_access_type = get_negative_load_store_type
2457 (stmt, vectype, vls_type, ncopies);
2458 else if (cmp == 0)
2459 {
2460 gcc_assert (vls_type == VLS_LOAD);
2461 *memory_access_type = VMAT_INVARIANT;
2462 }
2463 else
2464 *memory_access_type = VMAT_CONTIGUOUS;
2465 }
2de001ee 2466
4d694b27
RS
2467 if ((*memory_access_type == VMAT_ELEMENTWISE
2468 || *memory_access_type == VMAT_STRIDED_SLP)
2469 && !nunits.is_constant ())
2470 {
2471 if (dump_enabled_p ())
2472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2473 "Not using elementwise accesses due to variable "
2474 "vectorization factor.\n");
2475 return false;
2476 }
2477
2de001ee
RS
2478 /* FIXME: At the moment the cost model seems to underestimate the
2479 cost of using elementwise accesses. This check preserves the
2480 traditional behavior until that can be fixed. */
2481 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8 2482 && !STMT_VINFO_STRIDED_P (stmt_info)
bffb8014 2483 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2c53b149
RB
2484 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2485 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2de001ee
RS
2486 {
2487 if (dump_enabled_p ())
2488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2489 "not falling back to elementwise accesses\n");
2490 return false;
2491 }
2492 return true;
2493}
2494
aaeefd88 2495/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2496 conditional load or store STMT. When returning true, store the type
2497 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2498 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2499
2500static bool
929b4411
RS
2501vect_check_load_store_mask (gimple *stmt, tree mask,
2502 vect_def_type *mask_dt_out,
2503 tree *mask_vectype_out)
aaeefd88
RS
2504{
2505 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2506 {
2507 if (dump_enabled_p ())
2508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2509 "mask argument is not a boolean.\n");
2510 return false;
2511 }
2512
2513 if (TREE_CODE (mask) != SSA_NAME)
2514 {
2515 if (dump_enabled_p ())
2516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2517 "mask argument is not an SSA name.\n");
2518 return false;
2519 }
2520
2521 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2522 enum vect_def_type mask_dt;
aaeefd88 2523 tree mask_vectype;
894dd753 2524 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
aaeefd88
RS
2525 {
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2528 "mask use not simple.\n");
2529 return false;
2530 }
2531
2532 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2533 if (!mask_vectype)
2534 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2535
2536 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2537 {
2538 if (dump_enabled_p ())
2539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540 "could not find an appropriate vector mask type.\n");
2541 return false;
2542 }
2543
2544 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2545 TYPE_VECTOR_SUBPARTS (vectype)))
2546 {
2547 if (dump_enabled_p ())
2548 {
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2550 "vector mask type ");
2551 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2552 dump_printf (MSG_MISSED_OPTIMIZATION,
2553 " does not match vector data type ");
2554 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2555 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2556 }
2557 return false;
2558 }
2559
929b4411 2560 *mask_dt_out = mask_dt;
aaeefd88
RS
2561 *mask_vectype_out = mask_vectype;
2562 return true;
2563}
2564
3133c3b6
RS
2565/* Return true if stored value RHS is suitable for vectorizing store
2566 statement STMT. When returning true, store the type of the
929b4411
RS
2567 definition in *RHS_DT_OUT, the type of the vectorized store value in
2568 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2569
2570static bool
929b4411
RS
2571vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2572 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2573{
2574 /* In the case this is a store from a constant make sure
2575 native_encode_expr can handle it. */
2576 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2577 {
2578 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2580 "cannot encode constant as a byte sequence.\n");
2581 return false;
2582 }
2583
2584 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2585 enum vect_def_type rhs_dt;
3133c3b6 2586 tree rhs_vectype;
894dd753 2587 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
3133c3b6
RS
2588 {
2589 if (dump_enabled_p ())
2590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2591 "use not simple.\n");
2592 return false;
2593 }
2594
2595 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2596 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2597 {
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2600 "incompatible vector types.\n");
2601 return false;
2602 }
2603
929b4411 2604 *rhs_dt_out = rhs_dt;
3133c3b6 2605 *rhs_vectype_out = rhs_vectype;
929b4411 2606 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2607 *vls_type_out = VLS_STORE_INVARIANT;
2608 else
2609 *vls_type_out = VLS_STORE;
2610 return true;
2611}
2612
bc9587eb
RS
2613/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2614 Note that we support masks with floating-point type, in which case the
2615 floats are interpreted as a bitmask. */
2616
2617static tree
2618vect_build_all_ones_mask (gimple *stmt, tree masktype)
2619{
2620 if (TREE_CODE (masktype) == INTEGER_TYPE)
2621 return build_int_cst (masktype, -1);
2622 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2623 {
2624 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2625 mask = build_vector_from_val (masktype, mask);
2626 return vect_init_vector (stmt, mask, masktype, NULL);
2627 }
2628 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2629 {
2630 REAL_VALUE_TYPE r;
2631 long tmp[6];
2632 for (int j = 0; j < 6; ++j)
2633 tmp[j] = -1;
2634 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2635 tree mask = build_real (TREE_TYPE (masktype), r);
2636 mask = build_vector_from_val (masktype, mask);
2637 return vect_init_vector (stmt, mask, masktype, NULL);
2638 }
2639 gcc_unreachable ();
2640}
2641
2642/* Build an all-zero merge value of type VECTYPE while vectorizing
2643 STMT as a gather load. */
2644
2645static tree
2646vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2647{
2648 tree merge;
2649 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2650 merge = build_int_cst (TREE_TYPE (vectype), 0);
2651 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2652 {
2653 REAL_VALUE_TYPE r;
2654 long tmp[6];
2655 for (int j = 0; j < 6; ++j)
2656 tmp[j] = 0;
2657 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2658 merge = build_real (TREE_TYPE (vectype), r);
2659 }
2660 else
2661 gcc_unreachable ();
2662 merge = build_vector_from_val (vectype, merge);
2663 return vect_init_vector (stmt, merge, vectype, NULL);
2664}
2665
c48d2d35
RS
2666/* Build a gather load call while vectorizing STMT. Insert new instructions
2667 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2668 operation. If the load is conditional, MASK is the unvectorized
929b4411 2669 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2670
2671static void
2672vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195
RS
2673 stmt_vec_info *vec_stmt,
2674 gather_scatter_info *gs_info, tree mask,
2675 vect_def_type mask_dt)
c48d2d35
RS
2676{
2677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2678 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2679 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2680 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2681 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2682 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2683 edge pe = loop_preheader_edge (loop);
2684 enum { NARROW, NONE, WIDEN } modifier;
2685 poly_uint64 gather_off_nunits
2686 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2687
2688 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2689 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2690 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2691 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2692 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2693 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2694 tree scaletype = TREE_VALUE (arglist);
2695 gcc_checking_assert (types_compatible_p (srctype, rettype)
2696 && (!mask || types_compatible_p (srctype, masktype)));
2697
2698 tree perm_mask = NULL_TREE;
2699 tree mask_perm_mask = NULL_TREE;
2700 if (known_eq (nunits, gather_off_nunits))
2701 modifier = NONE;
2702 else if (known_eq (nunits * 2, gather_off_nunits))
2703 {
2704 modifier = WIDEN;
2705
2706 /* Currently widening gathers and scatters are only supported for
2707 fixed-length vectors. */
2708 int count = gather_off_nunits.to_constant ();
2709 vec_perm_builder sel (count, count, 1);
2710 for (int i = 0; i < count; ++i)
2711 sel.quick_push (i | (count / 2));
2712
2713 vec_perm_indices indices (sel, 1, count);
2714 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2715 indices);
2716 }
2717 else if (known_eq (nunits, gather_off_nunits * 2))
2718 {
2719 modifier = NARROW;
2720
2721 /* Currently narrowing gathers and scatters are only supported for
2722 fixed-length vectors. */
2723 int count = nunits.to_constant ();
2724 vec_perm_builder sel (count, count, 1);
2725 sel.quick_grow (count);
2726 for (int i = 0; i < count; ++i)
2727 sel[i] = i < count / 2 ? i : i + count / 2;
2728 vec_perm_indices indices (sel, 2, count);
2729 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2730
2731 ncopies *= 2;
2732
2733 if (mask)
2734 {
2735 for (int i = 0; i < count; ++i)
2736 sel[i] = i | (count / 2);
2737 indices.new_vector (sel, 2, count);
2738 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2739 }
2740 }
2741 else
2742 gcc_unreachable ();
2743
2744 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2745 vectype);
2746
2747 tree ptr = fold_convert (ptrtype, gs_info->base);
2748 if (!is_gimple_min_invariant (ptr))
2749 {
2750 gimple_seq seq;
2751 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2752 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2753 gcc_assert (!new_bb);
2754 }
2755
2756 tree scale = build_int_cst (scaletype, gs_info->scale);
2757
2758 tree vec_oprnd0 = NULL_TREE;
2759 tree vec_mask = NULL_TREE;
2760 tree src_op = NULL_TREE;
2761 tree mask_op = NULL_TREE;
2762 tree prev_res = NULL_TREE;
2763 stmt_vec_info prev_stmt_info = NULL;
2764
2765 if (!mask)
2766 {
2767 src_op = vect_build_zero_merge_argument (stmt, rettype);
2768 mask_op = vect_build_all_ones_mask (stmt, masktype);
2769 }
2770
2771 for (int j = 0; j < ncopies; ++j)
2772 {
2773 tree op, var;
c48d2d35
RS
2774 if (modifier == WIDEN && (j & 1))
2775 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2776 perm_mask, stmt, gsi);
2777 else if (j == 0)
2778 op = vec_oprnd0
2779 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2780 else
2781 op = vec_oprnd0
2782 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2783
2784 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2785 {
2786 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2787 TYPE_VECTOR_SUBPARTS (idxtype)));
2788 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2789 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
e1bd7296 2790 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
c48d2d35
RS
2791 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2792 op = var;
2793 }
2794
2795 if (mask)
2796 {
2797 if (mask_perm_mask && (j & 1))
2798 mask_op = permute_vec_elements (mask_op, mask_op,
2799 mask_perm_mask, stmt, gsi);
2800 else
2801 {
2802 if (j == 0)
2803 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2804 else
929b4411 2805 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2806
2807 mask_op = vec_mask;
2808 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2809 {
2810 gcc_assert
2811 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2812 TYPE_VECTOR_SUBPARTS (masktype)));
2813 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2814 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
e1bd7296
RS
2815 gassign *new_stmt
2816 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
c48d2d35
RS
2817 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2818 mask_op = var;
2819 }
2820 }
2821 src_op = mask_op;
2822 }
2823
e1bd7296
RS
2824 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2825 mask_op, scale);
c48d2d35 2826
e1bd7296 2827 stmt_vec_info new_stmt_info;
c48d2d35
RS
2828 if (!useless_type_conversion_p (vectype, rettype))
2829 {
2830 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2831 TYPE_VECTOR_SUBPARTS (rettype)));
2832 op = vect_get_new_ssa_name (rettype, vect_simple_var);
e1bd7296
RS
2833 gimple_call_set_lhs (new_call, op);
2834 vect_finish_stmt_generation (stmt, new_call, gsi);
c48d2d35
RS
2835 var = make_ssa_name (vec_dest);
2836 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
e1bd7296
RS
2837 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2838 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
c48d2d35
RS
2839 }
2840 else
2841 {
e1bd7296
RS
2842 var = make_ssa_name (vec_dest, new_call);
2843 gimple_call_set_lhs (new_call, var);
2844 new_stmt_info = vect_finish_stmt_generation (stmt, new_call, gsi);
c48d2d35
RS
2845 }
2846
c48d2d35
RS
2847 if (modifier == NARROW)
2848 {
2849 if ((j & 1) == 0)
2850 {
2851 prev_res = var;
2852 continue;
2853 }
2854 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
e1bd7296 2855 new_stmt_info = loop_vinfo->lookup_def (var);
c48d2d35
RS
2856 }
2857
dbe1b846 2858 if (prev_stmt_info == NULL_STMT_VEC_INFO)
e1bd7296 2859 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
c48d2d35 2860 else
e1bd7296
RS
2861 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2862 prev_stmt_info = new_stmt_info;
c48d2d35
RS
2863 }
2864}
2865
bfaa08b7
RS
2866/* Prepare the base and offset in GS_INFO for vectorization.
2867 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2868 to the vectorized offset argument for the first copy of STMT. STMT
2869 is the statement described by GS_INFO and LOOP is the containing loop. */
2870
2871static void
2872vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2873 gather_scatter_info *gs_info,
2874 tree *dataref_ptr, tree *vec_offset)
2875{
2876 gimple_seq stmts = NULL;
2877 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2878 if (stmts != NULL)
2879 {
2880 basic_block new_bb;
2881 edge pe = loop_preheader_edge (loop);
2882 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2883 gcc_assert (!new_bb);
2884 }
2885 tree offset_type = TREE_TYPE (gs_info->offset);
2886 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2887 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2888 offset_vectype);
2889}
2890
ab2fc782
RS
2891/* Prepare to implement a grouped or strided load or store using
2892 the gather load or scatter store operation described by GS_INFO.
2893 STMT is the load or store statement.
2894
2895 Set *DATAREF_BUMP to the amount that should be added to the base
2896 address after each copy of the vectorized statement. Set *VEC_OFFSET
2897 to an invariant offset vector in which element I has the value
2898 I * DR_STEP / SCALE. */
2899
2900static void
2901vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2902 gather_scatter_info *gs_info,
2903 tree *dataref_bump, tree *vec_offset)
2904{
2905 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2906 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2907 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2909 gimple_seq stmts;
2910
2911 tree bump = size_binop (MULT_EXPR,
2912 fold_convert (sizetype, DR_STEP (dr)),
2913 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2914 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2915 if (stmts)
2916 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2917
2918 /* The offset given in GS_INFO can have pointer type, so use the element
2919 type of the vector instead. */
2920 tree offset_type = TREE_TYPE (gs_info->offset);
2921 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2922 offset_type = TREE_TYPE (offset_vectype);
2923
2924 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2925 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2926 ssize_int (gs_info->scale));
2927 step = fold_convert (offset_type, step);
2928 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2929
2930 /* Create {0, X, X*2, X*3, ...}. */
2931 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2932 build_zero_cst (offset_type), step);
2933 if (stmts)
2934 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2935}
2936
2937/* Return the amount that should be added to a vector pointer to move
2938 to the next or previous copy of AGGR_TYPE. DR is the data reference
2939 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2940 vectorization. */
2941
2942static tree
2943vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2944 vect_memory_access_type memory_access_type)
2945{
2946 if (memory_access_type == VMAT_INVARIANT)
2947 return size_zero_node;
2948
2949 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2950 tree step = vect_dr_behavior (dr)->step;
2951 if (tree_int_cst_sgn (step) == -1)
2952 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2953 return iv_step;
2954}
2955
37b14185
RB
2956/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2957
2958static bool
2959vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 2960 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2
RB
2961 tree vectype_in, enum vect_def_type *dt,
2962 stmt_vector_for_cost *cost_vec)
37b14185
RB
2963{
2964 tree op, vectype;
2965 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2966 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2967 unsigned ncopies;
2968 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2969
2970 op = gimple_call_arg (stmt, 0);
2971 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2972
2973 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2974 return false;
37b14185
RB
2975
2976 /* Multiple types in SLP are handled by creating the appropriate number of
2977 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2978 case of SLP. */
2979 if (slp_node)
2980 ncopies = 1;
2981 else
e8f142e2 2982 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2983
2984 gcc_assert (ncopies >= 1);
2985
2986 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2987 if (! char_vectype)
2988 return false;
2989
928686b1
RS
2990 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2991 return false;
2992
794e3180 2993 unsigned word_bytes = num_bytes / nunits;
908a1a16 2994
d980067b
RS
2995 /* The encoding uses one stepped pattern for each byte in the word. */
2996 vec_perm_builder elts (num_bytes, word_bytes, 3);
2997 for (unsigned i = 0; i < 3; ++i)
37b14185 2998 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2999 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 3000
e3342de4
RS
3001 vec_perm_indices indices (elts, 1, num_bytes);
3002 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
3003 return false;
3004
3005 if (! vec_stmt)
3006 {
3007 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3008 DUMP_VECT_SCOPE ("vectorizable_bswap");
78604de0 3009 if (! slp_node)
37b14185 3010 {
68435eb2
RB
3011 record_stmt_cost (cost_vec,
3012 1, vector_stmt, stmt_info, 0, vect_prologue);
3013 record_stmt_cost (cost_vec,
3014 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
3015 }
3016 return true;
3017 }
3018
736d0f28 3019 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
3020
3021 /* Transform. */
3022 vec<tree> vec_oprnds = vNULL;
e1bd7296 3023 stmt_vec_info new_stmt_info = NULL;
37b14185
RB
3024 stmt_vec_info prev_stmt_info = NULL;
3025 for (unsigned j = 0; j < ncopies; j++)
3026 {
3027 /* Handle uses. */
3028 if (j == 0)
306b0c92 3029 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
3030 else
3031 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3032
3033 /* Arguments are ready. create the new vector stmt. */
3034 unsigned i;
3035 tree vop;
3036 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3037 {
e1bd7296 3038 gimple *new_stmt;
37b14185
RB
3039 tree tem = make_ssa_name (char_vectype);
3040 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3041 char_vectype, vop));
3042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3043 tree tem2 = make_ssa_name (char_vectype);
3044 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3045 tem, tem, bswap_vconst);
3046 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3047 tem = make_ssa_name (vectype);
3048 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3049 vectype, tem2));
e1bd7296 3050 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
37b14185 3051 if (slp_node)
e1bd7296 3052 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
37b14185
RB
3053 }
3054
3055 if (slp_node)
3056 continue;
3057
3058 if (j == 0)
e1bd7296 3059 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
37b14185 3060 else
e1bd7296 3061 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
37b14185 3062
e1bd7296 3063 prev_stmt_info = new_stmt_info;
37b14185
RB
3064 }
3065
3066 vec_oprnds.release ();
3067 return true;
3068}
3069
b1b6836e
RS
3070/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3071 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3072 in a single step. On success, store the binary pack code in
3073 *CONVERT_CODE. */
3074
3075static bool
3076simple_integer_narrowing (tree vectype_out, tree vectype_in,
3077 tree_code *convert_code)
3078{
3079 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3080 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3081 return false;
3082
3083 tree_code code;
3084 int multi_step_cvt = 0;
3085 auto_vec <tree, 8> interm_types;
3086 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3087 &code, &multi_step_cvt,
3088 &interm_types)
3089 || multi_step_cvt)
3090 return false;
3091
3092 *convert_code = code;
3093 return true;
3094}
5ce9450f 3095
ebfd146a
IR
3096/* Function vectorizable_call.
3097
538dd0b7 3098 Check if GS performs a function call that can be vectorized.
b8698a0f 3099 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3100 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3101 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3102
3103static bool
1eede195
RS
3104vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi,
3105 stmt_vec_info *vec_stmt, slp_tree slp_node,
3106 stmt_vector_for_cost *cost_vec)
ebfd146a 3107{
538dd0b7 3108 gcall *stmt;
ebfd146a
IR
3109 tree vec_dest;
3110 tree scalar_dest;
0267732b 3111 tree op;
ebfd146a 3112 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3113 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3114 tree vectype_out, vectype_in;
c7bda0f4
RS
3115 poly_uint64 nunits_in;
3116 poly_uint64 nunits_out;
ebfd146a 3117 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3118 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3119 vec_info *vinfo = stmt_info->vinfo;
81c40241 3120 tree fndecl, new_temp, rhs_type;
2c58d42c
RS
3121 enum vect_def_type dt[4]
3122 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3123 vect_unknown_def_type };
3124 int ndts = ARRAY_SIZE (dt);
ebfd146a 3125 int ncopies, j;
2c58d42c
RS
3126 auto_vec<tree, 8> vargs;
3127 auto_vec<tree, 8> orig_vargs;
ebfd146a
IR
3128 enum { NARROW, NONE, WIDEN } modifier;
3129 size_t i, nargs;
9d5e7640 3130 tree lhs;
ebfd146a 3131
190c2236 3132 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3133 return false;
3134
66c16fd9
RB
3135 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3136 && ! vec_stmt)
ebfd146a
IR
3137 return false;
3138
538dd0b7
DM
3139 /* Is GS a vectorizable call? */
3140 stmt = dyn_cast <gcall *> (gs);
3141 if (!stmt)
ebfd146a
IR
3142 return false;
3143
5ce9450f 3144 if (gimple_call_internal_p (stmt)
bfaa08b7 3145 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3146 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3147 /* Handled by vectorizable_load and vectorizable_store. */
3148 return false;
5ce9450f 3149
0136f8f0
AH
3150 if (gimple_call_lhs (stmt) == NULL_TREE
3151 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3152 return false;
3153
0136f8f0 3154 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3155
b690cc0f
RG
3156 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3157
ebfd146a
IR
3158 /* Process function arguments. */
3159 rhs_type = NULL_TREE;
b690cc0f 3160 vectype_in = NULL_TREE;
ebfd146a
IR
3161 nargs = gimple_call_num_args (stmt);
3162
1b1562a5
MM
3163 /* Bail out if the function has more than three arguments, we do not have
3164 interesting builtin functions to vectorize with more than two arguments
3165 except for fma. No arguments is also not good. */
2c58d42c 3166 if (nargs == 0 || nargs > 4)
ebfd146a
IR
3167 return false;
3168
74bf76ed 3169 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2c58d42c
RS
3170 combined_fn cfn = gimple_call_combined_fn (stmt);
3171 if (cfn == CFN_GOMP_SIMD_LANE)
74bf76ed
JJ
3172 {
3173 nargs = 0;
3174 rhs_type = unsigned_type_node;
3175 }
3176
2c58d42c
RS
3177 int mask_opno = -1;
3178 if (internal_fn_p (cfn))
3179 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3180
ebfd146a
IR
3181 for (i = 0; i < nargs; i++)
3182 {
b690cc0f
RG
3183 tree opvectype;
3184
ebfd146a 3185 op = gimple_call_arg (stmt, i);
2c58d42c
RS
3186 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3187 {
3188 if (dump_enabled_p ())
3189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3190 "use not simple.\n");
3191 return false;
3192 }
3193
3194 /* Skip the mask argument to an internal function. This operand
3195 has been converted via a pattern if necessary. */
3196 if ((int) i == mask_opno)
3197 continue;
ebfd146a
IR
3198
3199 /* We can only handle calls with arguments of the same type. */
3200 if (rhs_type
8533c9d8 3201 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3202 {
73fbfcad 3203 if (dump_enabled_p ())
78c60e3d 3204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3205 "argument types differ.\n");
ebfd146a
IR
3206 return false;
3207 }
b690cc0f
RG
3208 if (!rhs_type)
3209 rhs_type = TREE_TYPE (op);
ebfd146a 3210
b690cc0f
RG
3211 if (!vectype_in)
3212 vectype_in = opvectype;
3213 else if (opvectype
3214 && opvectype != vectype_in)
3215 {
73fbfcad 3216 if (dump_enabled_p ())
78c60e3d 3217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3218 "argument vector types differ.\n");
b690cc0f
RG
3219 return false;
3220 }
3221 }
3222 /* If all arguments are external or constant defs use a vector type with
3223 the same size as the output vector type. */
ebfd146a 3224 if (!vectype_in)
b690cc0f 3225 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3226 if (vec_stmt)
3227 gcc_assert (vectype_in);
3228 if (!vectype_in)
3229 {
73fbfcad 3230 if (dump_enabled_p ())
7d8930a0 3231 {
78c60e3d
SS
3232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3233 "no vectype for scalar type ");
3234 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3235 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3236 }
3237
3238 return false;
3239 }
ebfd146a
IR
3240
3241 /* FORNOW */
b690cc0f
RG
3242 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3243 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3244 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3245 modifier = NARROW;
c7bda0f4 3246 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3247 modifier = NONE;
c7bda0f4 3248 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3249 modifier = WIDEN;
3250 else
3251 return false;
3252
70439f0d
RS
3253 /* We only handle functions that do not read or clobber memory. */
3254 if (gimple_vuse (stmt))
3255 {
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3258 "function reads from or writes to memory.\n");
3259 return false;
3260 }
3261
ebfd146a
IR
3262 /* For now, we only vectorize functions if a target specific builtin
3263 is available. TODO -- in some cases, it might be profitable to
3264 insert the calls for pieces of the vector, in order to be able
3265 to vectorize other operations in the loop. */
70439f0d
RS
3266 fndecl = NULL_TREE;
3267 internal_fn ifn = IFN_LAST;
70439f0d
RS
3268 tree callee = gimple_call_fndecl (stmt);
3269
3270 /* First try using an internal function. */
b1b6836e
RS
3271 tree_code convert_code = ERROR_MARK;
3272 if (cfn != CFN_LAST
3273 && (modifier == NONE
3274 || (modifier == NARROW
3275 && simple_integer_narrowing (vectype_out, vectype_in,
3276 &convert_code))))
70439f0d
RS
3277 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3278 vectype_in);
3279
3280 /* If that fails, try asking for a target-specific built-in function. */
3281 if (ifn == IFN_LAST)
3282 {
3283 if (cfn != CFN_LAST)
3284 fndecl = targetm.vectorize.builtin_vectorized_function
3285 (cfn, vectype_out, vectype_in);
7672aa9b 3286 else if (callee)
70439f0d
RS
3287 fndecl = targetm.vectorize.builtin_md_vectorized_function
3288 (callee, vectype_out, vectype_in);
3289 }
3290
3291 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3292 {
70439f0d 3293 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3294 && !slp_node
3295 && loop_vinfo
3296 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3297 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3298 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3299 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3300 {
3301 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3302 { 0, 1, 2, ... vf - 1 } vector. */
3303 gcc_assert (nargs == 0);
3304 }
37b14185
RB
3305 else if (modifier == NONE
3306 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3307 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3308 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3309 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
68435eb2 3310 vectype_in, dt, cost_vec);
74bf76ed
JJ
3311 else
3312 {
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3315 "function is not vectorizable.\n");
74bf76ed
JJ
3316 return false;
3317 }
ebfd146a
IR
3318 }
3319
fce57248 3320 if (slp_node)
190c2236 3321 ncopies = 1;
b1b6836e 3322 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3323 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3324 else
e8f142e2 3325 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3326
3327 /* Sanity check: make sure that at least one copy of the vectorized stmt
3328 needs to be generated. */
3329 gcc_assert (ncopies >= 1);
3330
ed623edb 3331 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
ebfd146a
IR
3332 if (!vec_stmt) /* transformation not required. */
3333 {
3334 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3335 DUMP_VECT_SCOPE ("vectorizable_call");
68435eb2
RB
3336 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3337 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3338 record_stmt_cost (cost_vec, ncopies / 2,
3339 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3340
2c58d42c
RS
3341 if (loop_vinfo && mask_opno >= 0)
3342 {
3343 unsigned int nvectors = (slp_node
3344 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3345 : ncopies);
3346 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3347 }
ebfd146a
IR
3348 return true;
3349 }
3350
67b8dbac 3351 /* Transform. */
ebfd146a 3352
73fbfcad 3353 if (dump_enabled_p ())
e645e942 3354 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3355
3356 /* Handle def. */
3357 scalar_dest = gimple_call_lhs (stmt);
3358 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3359
2c58d42c
RS
3360 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3361
e1bd7296 3362 stmt_vec_info new_stmt_info = NULL;
ebfd146a 3363 prev_stmt_info = NULL;
b1b6836e 3364 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3365 {
b1b6836e 3366 tree prev_res = NULL_TREE;
2c58d42c
RS
3367 vargs.safe_grow (nargs);
3368 orig_vargs.safe_grow (nargs);
ebfd146a
IR
3369 for (j = 0; j < ncopies; ++j)
3370 {
3371 /* Build argument list for the vectorized call. */
190c2236
JJ
3372 if (slp_node)
3373 {
ef062b13 3374 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3375 vec<tree> vec_oprnds0;
190c2236
JJ
3376
3377 for (i = 0; i < nargs; i++)
2c58d42c 3378 vargs[i] = gimple_call_arg (stmt, i);
306b0c92 3379 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3380 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3381
3382 /* Arguments are ready. Create the new vector stmt. */
9771b263 3383 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3384 {
3385 size_t k;
3386 for (k = 0; k < nargs; k++)
3387 {
37b5ec8f 3388 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3389 vargs[k] = vec_oprndsk[i];
190c2236 3390 }
b1b6836e
RS
3391 if (modifier == NARROW)
3392 {
2c58d42c
RS
3393 /* We don't define any narrowing conditional functions
3394 at present. */
3395 gcc_assert (mask_opno < 0);
b1b6836e 3396 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3397 gcall *call
3398 = gimple_build_call_internal_vec (ifn, vargs);
3399 gimple_call_set_lhs (call, half_res);
3400 gimple_call_set_nothrow (call, true);
e1bd7296
RS
3401 new_stmt_info
3402 = vect_finish_stmt_generation (stmt, call, gsi);
b1b6836e
RS
3403 if ((i & 1) == 0)
3404 {
3405 prev_res = half_res;
3406 continue;
3407 }
3408 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3409 gimple *new_stmt
3410 = gimple_build_assign (new_temp, convert_code,
3411 prev_res, half_res);
3412 new_stmt_info
3413 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
b1b6836e 3414 }
70439f0d 3415 else
b1b6836e 3416 {
2c58d42c
RS
3417 if (mask_opno >= 0 && masked_loop_p)
3418 {
3419 unsigned int vec_num = vec_oprnds0.length ();
3420 /* Always true for SLP. */
3421 gcc_assert (ncopies == 1);
3422 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3423 vectype_out, i);
3424 vargs[mask_opno] = prepare_load_store_mask
3425 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3426 }
3427
a844293d 3428 gcall *call;
b1b6836e 3429 if (ifn != IFN_LAST)
a844293d 3430 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3431 else
a844293d
RS
3432 call = gimple_build_call_vec (fndecl, vargs);
3433 new_temp = make_ssa_name (vec_dest, call);
3434 gimple_call_set_lhs (call, new_temp);
3435 gimple_call_set_nothrow (call, true);
e1bd7296
RS
3436 new_stmt_info
3437 = vect_finish_stmt_generation (stmt, call, gsi);
b1b6836e 3438 }
e1bd7296 3439 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
190c2236
JJ
3440 }
3441
3442 for (i = 0; i < nargs; i++)
3443 {
37b5ec8f 3444 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3445 vec_oprndsi.release ();
190c2236 3446 }
190c2236
JJ
3447 continue;
3448 }
3449
ebfd146a
IR
3450 for (i = 0; i < nargs; i++)
3451 {
3452 op = gimple_call_arg (stmt, i);
3453 if (j == 0)
3454 vec_oprnd0
81c40241 3455 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3456 else
2c58d42c
RS
3457 vec_oprnd0
3458 = vect_get_vec_def_for_stmt_copy (dt[i], orig_vargs[i]);
3459
3460 orig_vargs[i] = vargs[i] = vec_oprnd0;
3461 }
ebfd146a 3462
2c58d42c
RS
3463 if (mask_opno >= 0 && masked_loop_p)
3464 {
3465 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3466 vectype_out, j);
3467 vargs[mask_opno]
3468 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3469 vargs[mask_opno], gsi);
ebfd146a
IR
3470 }
3471
2c58d42c 3472 if (cfn == CFN_GOMP_SIMD_LANE)
74bf76ed 3473 {
c7bda0f4 3474 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3475 tree new_var
0e22bb5a 3476 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3477 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3478 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3479 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3480 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3481 new_stmt_info
3482 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
74bf76ed 3483 }
b1b6836e
RS
3484 else if (modifier == NARROW)
3485 {
2c58d42c
RS
3486 /* We don't define any narrowing conditional functions at
3487 present. */
3488 gcc_assert (mask_opno < 0);
b1b6836e 3489 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3490 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3491 gimple_call_set_lhs (call, half_res);
3492 gimple_call_set_nothrow (call, true);
e1bd7296 3493 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
b1b6836e
RS
3494 if ((j & 1) == 0)
3495 {
3496 prev_res = half_res;
3497 continue;
3498 }
3499 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3500 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3501 prev_res, half_res);
3502 new_stmt_info
3503 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
b1b6836e 3504 }
74bf76ed
JJ
3505 else
3506 {
a844293d 3507 gcall *call;
70439f0d 3508 if (ifn != IFN_LAST)
a844293d 3509 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3510 else
a844293d 3511 call = gimple_build_call_vec (fndecl, vargs);
e1bd7296 3512 new_temp = make_ssa_name (vec_dest, call);
a844293d
RS
3513 gimple_call_set_lhs (call, new_temp);
3514 gimple_call_set_nothrow (call, true);
e1bd7296 3515 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
74bf76ed 3516 }
ebfd146a 3517
b1b6836e 3518 if (j == (modifier == NARROW ? 1 : 0))
e1bd7296 3519 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
ebfd146a 3520 else
e1bd7296 3521 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
ebfd146a 3522
e1bd7296 3523 prev_stmt_info = new_stmt_info;
ebfd146a 3524 }
b1b6836e
RS
3525 }
3526 else if (modifier == NARROW)
3527 {
2c58d42c
RS
3528 /* We don't define any narrowing conditional functions at present. */
3529 gcc_assert (mask_opno < 0);
ebfd146a
IR
3530 for (j = 0; j < ncopies; ++j)
3531 {
3532 /* Build argument list for the vectorized call. */
3533 if (j == 0)
9771b263 3534 vargs.create (nargs * 2);
ebfd146a 3535 else
9771b263 3536 vargs.truncate (0);
ebfd146a 3537
190c2236
JJ
3538 if (slp_node)
3539 {
ef062b13 3540 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3541 vec<tree> vec_oprnds0;
190c2236
JJ
3542
3543 for (i = 0; i < nargs; i++)
9771b263 3544 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3545 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3546 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3547
3548 /* Arguments are ready. Create the new vector stmt. */
9771b263 3549 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3550 {
3551 size_t k;
9771b263 3552 vargs.truncate (0);
190c2236
JJ
3553 for (k = 0; k < nargs; k++)
3554 {
37b5ec8f 3555 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3556 vargs.quick_push (vec_oprndsk[i]);
3557 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3558 }
a844293d 3559 gcall *call;
70439f0d 3560 if (ifn != IFN_LAST)
a844293d 3561 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3562 else
a844293d
RS
3563 call = gimple_build_call_vec (fndecl, vargs);
3564 new_temp = make_ssa_name (vec_dest, call);
3565 gimple_call_set_lhs (call, new_temp);
3566 gimple_call_set_nothrow (call, true);
e1bd7296
RS
3567 new_stmt_info
3568 = vect_finish_stmt_generation (stmt, call, gsi);
3569 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
190c2236
JJ
3570 }
3571
3572 for (i = 0; i < nargs; i++)
3573 {
37b5ec8f 3574 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3575 vec_oprndsi.release ();
190c2236 3576 }
190c2236
JJ
3577 continue;
3578 }
3579
ebfd146a
IR
3580 for (i = 0; i < nargs; i++)
3581 {
3582 op = gimple_call_arg (stmt, i);
3583 if (j == 0)
3584 {
3585 vec_oprnd0
81c40241 3586 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3587 vec_oprnd1
63827fb8 3588 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3589 }
3590 else
3591 {
e1bd7296
RS
3592 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3593 2 * i + 1);
ebfd146a 3594 vec_oprnd0
63827fb8 3595 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3596 vec_oprnd1
63827fb8 3597 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3598 }
3599
9771b263
DN
3600 vargs.quick_push (vec_oprnd0);
3601 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3602 }
3603
e1bd7296 3604 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3605 new_temp = make_ssa_name (vec_dest, new_stmt);
3606 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296 3607 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
3608
3609 if (j == 0)
e1bd7296 3610 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
ebfd146a 3611 else
e1bd7296 3612 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
ebfd146a 3613
e1bd7296 3614 prev_stmt_info = new_stmt_info;
ebfd146a
IR
3615 }
3616
3617 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3618 }
b1b6836e
RS
3619 else
3620 /* No current target implements this case. */
3621 return false;
ebfd146a 3622
9771b263 3623 vargs.release ();
ebfd146a 3624
ebfd146a
IR
3625 /* The call in STMT might prevent it from being removed in dce.
3626 We however cannot remove it here, due to the way the ssa name
3627 it defines is mapped to the new definition. So just replace
3628 rhs of the statement with something harmless. */
3629
dd34c087
JJ
3630 if (slp_node)
3631 return true;
3632
9d5e7640 3633 if (is_pattern_stmt_p (stmt_info))
10681ce8 3634 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
ed7b8123 3635 lhs = gimple_get_lhs (stmt_info->stmt);
3cc2fa2a 3636
e1bd7296
RS
3637 gassign *new_stmt
3638 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
ebfd146a 3639 set_vinfo_for_stmt (new_stmt, stmt_info);
ed7b8123 3640 set_vinfo_for_stmt (stmt_info->stmt, NULL);
ebfd146a
IR
3641 STMT_VINFO_STMT (stmt_info) = new_stmt;
3642 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3643
3644 return true;
3645}
3646
3647
0136f8f0
AH
3648struct simd_call_arg_info
3649{
3650 tree vectype;
3651 tree op;
0136f8f0 3652 HOST_WIDE_INT linear_step;
34e82342 3653 enum vect_def_type dt;
0136f8f0 3654 unsigned int align;
17b658af 3655 bool simd_lane_linear;
0136f8f0
AH
3656};
3657
17b658af
JJ
3658/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3659 is linear within simd lane (but not within whole loop), note it in
3660 *ARGINFO. */
3661
3662static void
3663vect_simd_lane_linear (tree op, struct loop *loop,
3664 struct simd_call_arg_info *arginfo)
3665{
355fe088 3666 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3667
3668 if (!is_gimple_assign (def_stmt)
3669 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3670 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3671 return;
3672
3673 tree base = gimple_assign_rhs1 (def_stmt);
3674 HOST_WIDE_INT linear_step = 0;
3675 tree v = gimple_assign_rhs2 (def_stmt);
3676 while (TREE_CODE (v) == SSA_NAME)
3677 {
3678 tree t;
3679 def_stmt = SSA_NAME_DEF_STMT (v);
3680 if (is_gimple_assign (def_stmt))
3681 switch (gimple_assign_rhs_code (def_stmt))
3682 {
3683 case PLUS_EXPR:
3684 t = gimple_assign_rhs2 (def_stmt);
3685 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3686 return;
3687 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3688 v = gimple_assign_rhs1 (def_stmt);
3689 continue;
3690 case MULT_EXPR:
3691 t = gimple_assign_rhs2 (def_stmt);
3692 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3693 return;
3694 linear_step = tree_to_shwi (t);
3695 v = gimple_assign_rhs1 (def_stmt);
3696 continue;
3697 CASE_CONVERT:
3698 t = gimple_assign_rhs1 (def_stmt);
3699 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3700 || (TYPE_PRECISION (TREE_TYPE (v))
3701 < TYPE_PRECISION (TREE_TYPE (t))))
3702 return;
3703 if (!linear_step)
3704 linear_step = 1;
3705 v = t;
3706 continue;
3707 default:
3708 return;
3709 }
8e4284d0 3710 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3711 && loop->simduid
3712 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3713 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3714 == loop->simduid))
3715 {
3716 if (!linear_step)
3717 linear_step = 1;
3718 arginfo->linear_step = linear_step;
3719 arginfo->op = base;
3720 arginfo->simd_lane_linear = true;
3721 return;
3722 }
3723 }
3724}
3725
cf1b2ba4
RS
3726/* Return the number of elements in vector type VECTYPE, which is associated
3727 with a SIMD clone. At present these vectors always have a constant
3728 length. */
3729
3730static unsigned HOST_WIDE_INT
3731simd_clone_subparts (tree vectype)
3732{
928686b1 3733 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3734}
3735
0136f8f0
AH
3736/* Function vectorizable_simd_clone_call.
3737
3738 Check if STMT performs a function call that can be vectorized
3739 by calling a simd clone of the function.
3740 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3741 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3742 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3743
3744static bool
355fe088 3745vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 3746 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 3747 stmt_vector_for_cost *)
0136f8f0
AH
3748{
3749 tree vec_dest;
3750 tree scalar_dest;
3751 tree op, type;
3752 tree vec_oprnd0 = NULL_TREE;
3753 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3754 tree vectype;
3755 unsigned int nunits;
3756 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3757 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3758 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3759 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3760 tree fndecl, new_temp;
0136f8f0 3761 int ncopies, j;
00426f9a 3762 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3763 vec<tree> vargs = vNULL;
3764 size_t i, nargs;
3765 tree lhs, rtype, ratype;
e7a74006 3766 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3767
3768 /* Is STMT a vectorizable call? */
3769 if (!is_gimple_call (stmt))
3770 return false;
3771
3772 fndecl = gimple_call_fndecl (stmt);
3773 if (fndecl == NULL_TREE)
3774 return false;
3775
d52f5295 3776 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3777 if (node == NULL || node->simd_clones == NULL)
3778 return false;
3779
3780 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3781 return false;
3782
66c16fd9
RB
3783 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3784 && ! vec_stmt)
0136f8f0
AH
3785 return false;
3786
3787 if (gimple_call_lhs (stmt)
3788 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3789 return false;
3790
3791 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3792
3793 vectype = STMT_VINFO_VECTYPE (stmt_info);
3794
3795 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3796 return false;
3797
3798 /* FORNOW */
fce57248 3799 if (slp_node)
0136f8f0
AH
3800 return false;
3801
3802 /* Process function arguments. */
3803 nargs = gimple_call_num_args (stmt);
3804
3805 /* Bail out if the function has zero arguments. */
3806 if (nargs == 0)
3807 return false;
3808
00426f9a 3809 arginfo.reserve (nargs, true);
0136f8f0
AH
3810
3811 for (i = 0; i < nargs; i++)
3812 {
3813 simd_call_arg_info thisarginfo;
3814 affine_iv iv;
3815
3816 thisarginfo.linear_step = 0;
3817 thisarginfo.align = 0;
3818 thisarginfo.op = NULL_TREE;
17b658af 3819 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3820
3821 op = gimple_call_arg (stmt, i);
894dd753 3822 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
81c40241 3823 &thisarginfo.vectype)
0136f8f0
AH
3824 || thisarginfo.dt == vect_uninitialized_def)
3825 {
3826 if (dump_enabled_p ())
3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3828 "use not simple.\n");
0136f8f0
AH
3829 return false;
3830 }
3831
3832 if (thisarginfo.dt == vect_constant_def
3833 || thisarginfo.dt == vect_external_def)
3834 gcc_assert (thisarginfo.vectype == NULL_TREE);
3835 else
3836 gcc_assert (thisarginfo.vectype != NULL_TREE);
3837
6c9e85fb
JJ
3838 /* For linear arguments, the analyze phase should have saved
3839 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3840 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3841 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3842 {
3843 gcc_assert (vec_stmt);
3844 thisarginfo.linear_step
17b658af 3845 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3846 thisarginfo.op
17b658af
JJ
3847 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3848 thisarginfo.simd_lane_linear
3849 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3850 == boolean_true_node);
6c9e85fb
JJ
3851 /* If loop has been peeled for alignment, we need to adjust it. */
3852 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3853 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3854 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3855 {
3856 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3857 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3858 tree opt = TREE_TYPE (thisarginfo.op);
3859 bias = fold_convert (TREE_TYPE (step), bias);
3860 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3861 thisarginfo.op
3862 = fold_build2 (POINTER_TYPE_P (opt)
3863 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3864 thisarginfo.op, bias);
3865 }
3866 }
3867 else if (!vec_stmt
3868 && thisarginfo.dt != vect_constant_def
3869 && thisarginfo.dt != vect_external_def
3870 && loop_vinfo
3871 && TREE_CODE (op) == SSA_NAME
3872 && simple_iv (loop, loop_containing_stmt (stmt), op,
3873 &iv, false)
3874 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3875 {
3876 thisarginfo.linear_step = tree_to_shwi (iv.step);
3877 thisarginfo.op = iv.base;
3878 }
3879 else if ((thisarginfo.dt == vect_constant_def
3880 || thisarginfo.dt == vect_external_def)
3881 && POINTER_TYPE_P (TREE_TYPE (op)))
3882 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3883 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3884 linear too. */
3885 if (POINTER_TYPE_P (TREE_TYPE (op))
3886 && !thisarginfo.linear_step
3887 && !vec_stmt
3888 && thisarginfo.dt != vect_constant_def
3889 && thisarginfo.dt != vect_external_def
3890 && loop_vinfo
3891 && !slp_node
3892 && TREE_CODE (op) == SSA_NAME)
3893 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3894
3895 arginfo.quick_push (thisarginfo);
3896 }
3897
d9f21f6a
RS
3898 unsigned HOST_WIDE_INT vf;
3899 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3900 {
3901 if (dump_enabled_p ())
3902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3903 "not considering SIMD clones; not yet supported"
3904 " for variable-width vectors.\n");
3905 return NULL;
3906 }
3907
0136f8f0
AH
3908 unsigned int badness = 0;
3909 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3910 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3911 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3912 else
3913 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3914 n = n->simdclone->next_clone)
3915 {
3916 unsigned int this_badness = 0;
d9f21f6a 3917 if (n->simdclone->simdlen > vf
0136f8f0
AH
3918 || n->simdclone->nargs != nargs)
3919 continue;
d9f21f6a
RS
3920 if (n->simdclone->simdlen < vf)
3921 this_badness += (exact_log2 (vf)
0136f8f0
AH
3922 - exact_log2 (n->simdclone->simdlen)) * 1024;
3923 if (n->simdclone->inbranch)
3924 this_badness += 2048;
3925 int target_badness = targetm.simd_clone.usable (n);
3926 if (target_badness < 0)
3927 continue;
3928 this_badness += target_badness * 512;
3929 /* FORNOW: Have to add code to add the mask argument. */
3930 if (n->simdclone->inbranch)
3931 continue;
3932 for (i = 0; i < nargs; i++)
3933 {
3934 switch (n->simdclone->args[i].arg_type)
3935 {
3936 case SIMD_CLONE_ARG_TYPE_VECTOR:
3937 if (!useless_type_conversion_p
3938 (n->simdclone->args[i].orig_type,
3939 TREE_TYPE (gimple_call_arg (stmt, i))))
3940 i = -1;
3941 else if (arginfo[i].dt == vect_constant_def
3942 || arginfo[i].dt == vect_external_def
3943 || arginfo[i].linear_step)
3944 this_badness += 64;
3945 break;
3946 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3947 if (arginfo[i].dt != vect_constant_def
3948 && arginfo[i].dt != vect_external_def)
3949 i = -1;
3950 break;
3951 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3952 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3953 if (arginfo[i].dt == vect_constant_def
3954 || arginfo[i].dt == vect_external_def
3955 || (arginfo[i].linear_step
3956 != n->simdclone->args[i].linear_step))
3957 i = -1;
3958 break;
3959 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3960 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3961 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3962 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3963 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3964 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3965 /* FORNOW */
3966 i = -1;
3967 break;
3968 case SIMD_CLONE_ARG_TYPE_MASK:
3969 gcc_unreachable ();
3970 }
3971 if (i == (size_t) -1)
3972 break;
3973 if (n->simdclone->args[i].alignment > arginfo[i].align)
3974 {
3975 i = -1;
3976 break;
3977 }
3978 if (arginfo[i].align)
3979 this_badness += (exact_log2 (arginfo[i].align)
3980 - exact_log2 (n->simdclone->args[i].alignment));
3981 }
3982 if (i == (size_t) -1)
3983 continue;
3984 if (bestn == NULL || this_badness < badness)
3985 {
3986 bestn = n;
3987 badness = this_badness;
3988 }
3989 }
3990
3991 if (bestn == NULL)
00426f9a 3992 return false;
0136f8f0
AH
3993
3994 for (i = 0; i < nargs; i++)
3995 if ((arginfo[i].dt == vect_constant_def
3996 || arginfo[i].dt == vect_external_def)
3997 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3998 {
3999 arginfo[i].vectype
4000 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4001 i)));
4002 if (arginfo[i].vectype == NULL
cf1b2ba4 4003 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 4004 > bestn->simdclone->simdlen))
00426f9a 4005 return false;
0136f8f0
AH
4006 }
4007
4008 fndecl = bestn->decl;
4009 nunits = bestn->simdclone->simdlen;
d9f21f6a 4010 ncopies = vf / nunits;
0136f8f0
AH
4011
4012 /* If the function isn't const, only allow it in simd loops where user
4013 has asserted that at least nunits consecutive iterations can be
4014 performed using SIMD instructions. */
4015 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4016 && gimple_vuse (stmt))
00426f9a 4017 return false;
0136f8f0
AH
4018
4019 /* Sanity check: make sure that at least one copy of the vectorized stmt
4020 needs to be generated. */
4021 gcc_assert (ncopies >= 1);
4022
4023 if (!vec_stmt) /* transformation not required. */
4024 {
6c9e85fb
JJ
4025 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4026 for (i = 0; i < nargs; i++)
7adb26f2
JJ
4027 if ((bestn->simdclone->args[i].arg_type
4028 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4029 || (bestn->simdclone->args[i].arg_type
4030 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 4031 {
17b658af 4032 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
4033 + 1);
4034 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4035 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4036 ? size_type_node : TREE_TYPE (arginfo[i].op);
4037 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4039 tree sll = arginfo[i].simd_lane_linear
4040 ? boolean_true_node : boolean_false_node;
4041 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4042 }
0136f8f0 4043 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
adac3a68 4044 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
68435eb2 4045/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4046 return true;
4047 }
4048
67b8dbac 4049 /* Transform. */
0136f8f0
AH
4050
4051 if (dump_enabled_p ())
4052 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4053
4054 /* Handle def. */
4055 scalar_dest = gimple_call_lhs (stmt);
4056 vec_dest = NULL_TREE;
4057 rtype = NULL_TREE;
4058 ratype = NULL_TREE;
4059 if (scalar_dest)
4060 {
4061 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4062 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4063 if (TREE_CODE (rtype) == ARRAY_TYPE)
4064 {
4065 ratype = rtype;
4066 rtype = TREE_TYPE (ratype);
4067 }
4068 }
4069
4070 prev_stmt_info = NULL;
4071 for (j = 0; j < ncopies; ++j)
4072 {
4073 /* Build argument list for the vectorized call. */
4074 if (j == 0)
4075 vargs.create (nargs);
4076 else
4077 vargs.truncate (0);
4078
4079 for (i = 0; i < nargs; i++)
4080 {
4081 unsigned int k, l, m, o;
4082 tree atype;
4083 op = gimple_call_arg (stmt, i);
4084 switch (bestn->simdclone->args[i].arg_type)
4085 {
4086 case SIMD_CLONE_ARG_TYPE_VECTOR:
4087 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4088 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4089 for (m = j * o; m < (j + 1) * o; m++)
4090 {
cf1b2ba4
RS
4091 if (simd_clone_subparts (atype)
4092 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4093 {
73a699ae 4094 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4095 k = (simd_clone_subparts (arginfo[i].vectype)
4096 / simd_clone_subparts (atype));
0136f8f0
AH
4097 gcc_assert ((k & (k - 1)) == 0);
4098 if (m == 0)
4099 vec_oprnd0
81c40241 4100 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4101 else
4102 {
4103 vec_oprnd0 = arginfo[i].op;
4104 if ((m & (k - 1)) == 0)
4105 vec_oprnd0
4106 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4107 vec_oprnd0);
4108 }
4109 arginfo[i].op = vec_oprnd0;
4110 vec_oprnd0
4111 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4112 bitsize_int (prec),
0136f8f0 4113 bitsize_int ((m & (k - 1)) * prec));
e1bd7296 4114 gassign *new_stmt
b731b390 4115 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4116 vec_oprnd0);
4117 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4118 vargs.safe_push (gimple_assign_lhs (new_stmt));
4119 }
4120 else
4121 {
cf1b2ba4
RS
4122 k = (simd_clone_subparts (atype)
4123 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4124 gcc_assert ((k & (k - 1)) == 0);
4125 vec<constructor_elt, va_gc> *ctor_elts;
4126 if (k != 1)
4127 vec_alloc (ctor_elts, k);
4128 else
4129 ctor_elts = NULL;
4130 for (l = 0; l < k; l++)
4131 {
4132 if (m == 0 && l == 0)
4133 vec_oprnd0
81c40241 4134 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4135 else
4136 vec_oprnd0
4137 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4138 arginfo[i].op);
4139 arginfo[i].op = vec_oprnd0;
4140 if (k == 1)
4141 break;
4142 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4143 vec_oprnd0);
4144 }
4145 if (k == 1)
4146 vargs.safe_push (vec_oprnd0);
4147 else
4148 {
4149 vec_oprnd0 = build_constructor (atype, ctor_elts);
e1bd7296 4150 gassign *new_stmt
b731b390 4151 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4152 vec_oprnd0);
4153 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4154 vargs.safe_push (gimple_assign_lhs (new_stmt));
4155 }
4156 }
4157 }
4158 break;
4159 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4160 vargs.safe_push (op);
4161 break;
4162 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4163 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4164 if (j == 0)
4165 {
4166 gimple_seq stmts;
4167 arginfo[i].op
4168 = force_gimple_operand (arginfo[i].op, &stmts, true,
4169 NULL_TREE);
4170 if (stmts != NULL)
4171 {
4172 basic_block new_bb;
4173 edge pe = loop_preheader_edge (loop);
4174 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4175 gcc_assert (!new_bb);
4176 }
17b658af
JJ
4177 if (arginfo[i].simd_lane_linear)
4178 {
4179 vargs.safe_push (arginfo[i].op);
4180 break;
4181 }
b731b390 4182 tree phi_res = copy_ssa_name (op);
538dd0b7 4183 gphi *new_phi = create_phi_node (phi_res, loop->header);
4fbeb363 4184 loop_vinfo->add_stmt (new_phi);
0136f8f0
AH
4185 add_phi_arg (new_phi, arginfo[i].op,
4186 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4187 enum tree_code code
4188 = POINTER_TYPE_P (TREE_TYPE (op))
4189 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4190 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4191 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4192 widest_int cst
4193 = wi::mul (bestn->simdclone->args[i].linear_step,
4194 ncopies * nunits);
4195 tree tcst = wide_int_to_tree (type, cst);
b731b390 4196 tree phi_arg = copy_ssa_name (op);
e1bd7296 4197 gassign *new_stmt
0d0e4a03 4198 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4199 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4200 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4fbeb363 4201 loop_vinfo->add_stmt (new_stmt);
0136f8f0
AH
4202 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4203 UNKNOWN_LOCATION);
4204 arginfo[i].op = phi_res;
4205 vargs.safe_push (phi_res);
4206 }
4207 else
4208 {
4209 enum tree_code code
4210 = POINTER_TYPE_P (TREE_TYPE (op))
4211 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4212 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4213 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4214 widest_int cst
4215 = wi::mul (bestn->simdclone->args[i].linear_step,
4216 j * nunits);
4217 tree tcst = wide_int_to_tree (type, cst);
b731b390 4218 new_temp = make_ssa_name (TREE_TYPE (op));
e1bd7296
RS
4219 gassign *new_stmt
4220 = gimple_build_assign (new_temp, code,
4221 arginfo[i].op, tcst);
0136f8f0
AH
4222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4223 vargs.safe_push (new_temp);
4224 }
4225 break;
7adb26f2
JJ
4226 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4227 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4228 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4229 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4230 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4231 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4232 default:
4233 gcc_unreachable ();
4234 }
4235 }
4236
e1bd7296 4237 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
0136f8f0
AH
4238 if (vec_dest)
4239 {
cf1b2ba4 4240 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4241 if (ratype)
b731b390 4242 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4243 else if (simd_clone_subparts (vectype)
4244 == simd_clone_subparts (rtype))
e1bd7296 4245 new_temp = make_ssa_name (vec_dest, new_call);
0136f8f0 4246 else
e1bd7296
RS
4247 new_temp = make_ssa_name (rtype, new_call);
4248 gimple_call_set_lhs (new_call, new_temp);
0136f8f0 4249 }
e1bd7296
RS
4250 stmt_vec_info new_stmt_info
4251 = vect_finish_stmt_generation (stmt, new_call, gsi);
0136f8f0
AH
4252
4253 if (vec_dest)
4254 {
cf1b2ba4 4255 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4256 {
4257 unsigned int k, l;
73a699ae
RS
4258 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4259 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4260 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4261 gcc_assert ((k & (k - 1)) == 0);
4262 for (l = 0; l < k; l++)
4263 {
4264 tree t;
4265 if (ratype)
4266 {
4267 t = build_fold_addr_expr (new_temp);
4268 t = build2 (MEM_REF, vectype, t,
73a699ae 4269 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4270 }
4271 else
4272 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4273 bitsize_int (prec), bitsize_int (l * prec));
e1bd7296 4274 gimple *new_stmt
b731b390 4275 = gimple_build_assign (make_ssa_name (vectype), t);
e1bd7296
RS
4276 new_stmt_info
4277 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4278
0136f8f0 4279 if (j == 0 && l == 0)
e1bd7296
RS
4280 STMT_VINFO_VEC_STMT (stmt_info)
4281 = *vec_stmt = new_stmt_info;
0136f8f0 4282 else
e1bd7296 4283 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4284
e1bd7296 4285 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4286 }
4287
4288 if (ratype)
3ba4ff41 4289 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4290 continue;
4291 }
cf1b2ba4 4292 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4293 {
cf1b2ba4
RS
4294 unsigned int k = (simd_clone_subparts (vectype)
4295 / simd_clone_subparts (rtype));
0136f8f0
AH
4296 gcc_assert ((k & (k - 1)) == 0);
4297 if ((j & (k - 1)) == 0)
4298 vec_alloc (ret_ctor_elts, k);
4299 if (ratype)
4300 {
cf1b2ba4 4301 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4302 for (m = 0; m < o; m++)
4303 {
4304 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4305 size_int (m), NULL_TREE, NULL_TREE);
e1bd7296 4306 gimple *new_stmt
b731b390 4307 = gimple_build_assign (make_ssa_name (rtype), tem);
e1bd7296
RS
4308 new_stmt_info
4309 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
0136f8f0
AH
4310 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4311 gimple_assign_lhs (new_stmt));
4312 }
3ba4ff41 4313 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4314 }
4315 else
4316 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4317 if ((j & (k - 1)) != k - 1)
4318 continue;
4319 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
e1bd7296 4320 gimple *new_stmt
b731b390 4321 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
e1bd7296
RS
4322 new_stmt_info
4323 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
0136f8f0
AH
4324
4325 if ((unsigned) j == k - 1)
e1bd7296 4326 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
0136f8f0 4327 else
e1bd7296 4328 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4329
e1bd7296 4330 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4331 continue;
4332 }
4333 else if (ratype)
4334 {
4335 tree t = build_fold_addr_expr (new_temp);
4336 t = build2 (MEM_REF, vectype, t,
4337 build_int_cst (TREE_TYPE (t), 0));
e1bd7296 4338 gimple *new_stmt
b731b390 4339 = gimple_build_assign (make_ssa_name (vec_dest), t);
e1bd7296
RS
4340 new_stmt_info
4341 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4342 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4343 }
4344 }
4345
4346 if (j == 0)
e1bd7296 4347 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
0136f8f0 4348 else
e1bd7296 4349 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4350
e1bd7296 4351 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4352 }
4353
4354 vargs.release ();
4355
4356 /* The call in STMT might prevent it from being removed in dce.
4357 We however cannot remove it here, due to the way the ssa name
4358 it defines is mapped to the new definition. So just replace
4359 rhs of the statement with something harmless. */
4360
4361 if (slp_node)
4362 return true;
4363
e1bd7296 4364 gimple *new_stmt;
0136f8f0
AH
4365 if (scalar_dest)
4366 {
4367 type = TREE_TYPE (scalar_dest);
4368 if (is_pattern_stmt_p (stmt_info))
10681ce8 4369 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
0136f8f0
AH
4370 else
4371 lhs = gimple_call_lhs (stmt);
4372 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4373 }
4374 else
4375 new_stmt = gimple_build_nop ();
4376 set_vinfo_for_stmt (new_stmt, stmt_info);
4377 set_vinfo_for_stmt (stmt, NULL);
4378 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4379 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4380 unlink_stmt_vdef (stmt);
4381
4382 return true;
4383}
4384
4385
ebfd146a
IR
4386/* Function vect_gen_widened_results_half
4387
4388 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4389 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4390 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4391 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4392 needs to be created (DECL is a function-decl of a target-builtin).
4393 STMT is the original scalar stmt that we are vectorizing. */
4394
355fe088 4395static gimple *
ebfd146a
IR
4396vect_gen_widened_results_half (enum tree_code code,
4397 tree decl,
4398 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4399 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4400 gimple *stmt)
b8698a0f 4401{
355fe088 4402 gimple *new_stmt;
b8698a0f
L
4403 tree new_temp;
4404
4405 /* Generate half of the widened result: */
4406 if (code == CALL_EXPR)
4407 {
4408 /* Target specific support */
ebfd146a
IR
4409 if (op_type == binary_op)
4410 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4411 else
4412 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4413 new_temp = make_ssa_name (vec_dest, new_stmt);
4414 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4415 }
4416 else
ebfd146a 4417 {
b8698a0f
L
4418 /* Generic support */
4419 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4420 if (op_type != binary_op)
4421 vec_oprnd1 = NULL;
0d0e4a03 4422 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4423 new_temp = make_ssa_name (vec_dest, new_stmt);
4424 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4425 }
ebfd146a
IR
4426 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4427
ebfd146a
IR
4428 return new_stmt;
4429}
4430
4a00c761
JJ
4431
4432/* Get vectorized definitions for loop-based vectorization. For the first
4433 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4434 scalar operand), and for the rest we get a copy with
4435 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4436 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4437 The vectors are collected into VEC_OPRNDS. */
4438
4439static void
355fe088 4440vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4441 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4442{
4443 tree vec_oprnd;
4444
4445 /* Get first vector operand. */
4446 /* All the vector operands except the very first one (that is scalar oprnd)
4447 are stmt copies. */
4448 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4449 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4450 else
4451 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4452
9771b263 4453 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4454
4455 /* Get second vector operand. */
4456 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4457 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4458
4459 *oprnd = vec_oprnd;
4460
4461 /* For conversion in multiple steps, continue to get operands
4462 recursively. */
4463 if (multi_step_cvt)
4464 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4465}
4466
4467
4468/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4469 For multi-step conversions store the resulting vectors and call the function
4470 recursively. */
4471
4472static void
9771b263 4473vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4474 int multi_step_cvt, gimple *stmt,
9771b263 4475 vec<tree> vec_dsts,
4a00c761
JJ
4476 gimple_stmt_iterator *gsi,
4477 slp_tree slp_node, enum tree_code code,
4478 stmt_vec_info *prev_stmt_info)
4479{
4480 unsigned int i;
4481 tree vop0, vop1, new_tmp, vec_dest;
4a00c761
JJ
4482 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4483
9771b263 4484 vec_dest = vec_dsts.pop ();
4a00c761 4485
9771b263 4486 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4487 {
4488 /* Create demotion operation. */
9771b263
DN
4489 vop0 = (*vec_oprnds)[i];
4490 vop1 = (*vec_oprnds)[i + 1];
e1bd7296 4491 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4492 new_tmp = make_ssa_name (vec_dest, new_stmt);
4493 gimple_assign_set_lhs (new_stmt, new_tmp);
e1bd7296
RS
4494 stmt_vec_info new_stmt_info
4495 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
4496
4497 if (multi_step_cvt)
4498 /* Store the resulting vector for next recursive call. */
9771b263 4499 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4500 else
4501 {
4502 /* This is the last step of the conversion sequence. Store the
4503 vectors in SLP_NODE or in vector info of the scalar statement
4504 (or in STMT_VINFO_RELATED_STMT chain). */
4505 if (slp_node)
e1bd7296 4506 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4a00c761 4507 else
c689ce1e
RB
4508 {
4509 if (!*prev_stmt_info)
e1bd7296 4510 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
c689ce1e 4511 else
e1bd7296 4512 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4a00c761 4513
e1bd7296 4514 *prev_stmt_info = new_stmt_info;
c689ce1e 4515 }
4a00c761
JJ
4516 }
4517 }
4518
4519 /* For multi-step demotion operations we first generate demotion operations
4520 from the source type to the intermediate types, and then combine the
4521 results (stored in VEC_OPRNDS) in demotion operation to the destination
4522 type. */
4523 if (multi_step_cvt)
4524 {
4525 /* At each level of recursion we have half of the operands we had at the
4526 previous level. */
9771b263 4527 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4528 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4529 stmt, vec_dsts, gsi, slp_node,
4530 VEC_PACK_TRUNC_EXPR,
4531 prev_stmt_info);
4532 }
4533
9771b263 4534 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4535}
4536
4537
4538/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4539 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4540 the resulting vectors and call the function recursively. */
4541
4542static void
9771b263
DN
4543vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4544 vec<tree> *vec_oprnds1,
355fe088 4545 gimple *stmt, tree vec_dest,
4a00c761
JJ
4546 gimple_stmt_iterator *gsi,
4547 enum tree_code code1,
4548 enum tree_code code2, tree decl1,
4549 tree decl2, int op_type)
4550{
4551 int i;
4552 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4553 gimple *new_stmt1, *new_stmt2;
6e1aa848 4554 vec<tree> vec_tmp = vNULL;
4a00c761 4555
9771b263
DN
4556 vec_tmp.create (vec_oprnds0->length () * 2);
4557 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4558 {
4559 if (op_type == binary_op)
9771b263 4560 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4561 else
4562 vop1 = NULL_TREE;
4563
4564 /* Generate the two halves of promotion operation. */
4565 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4566 op_type, vec_dest, gsi, stmt);
4567 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4568 op_type, vec_dest, gsi, stmt);
4569 if (is_gimple_call (new_stmt1))
4570 {
4571 new_tmp1 = gimple_call_lhs (new_stmt1);
4572 new_tmp2 = gimple_call_lhs (new_stmt2);
4573 }
4574 else
4575 {
4576 new_tmp1 = gimple_assign_lhs (new_stmt1);
4577 new_tmp2 = gimple_assign_lhs (new_stmt2);
4578 }
4579
4580 /* Store the results for the next step. */
9771b263
DN
4581 vec_tmp.quick_push (new_tmp1);
4582 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4583 }
4584
689eaba3 4585 vec_oprnds0->release ();
4a00c761
JJ
4586 *vec_oprnds0 = vec_tmp;
4587}
4588
4589
b8698a0f
L
4590/* Check if STMT performs a conversion operation, that can be vectorized.
4591 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4592 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4593 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4594
4595static bool
355fe088 4596vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 4597 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 4598 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4599{
4600 tree vec_dest;
4601 tree scalar_dest;
4a00c761 4602 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4603 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4604 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4605 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4606 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4607 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4608 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4609 tree new_temp;
ebfd146a 4610 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4611 int ndts = 2;
ebfd146a 4612 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4613 poly_uint64 nunits_in;
4614 poly_uint64 nunits_out;
ebfd146a 4615 tree vectype_out, vectype_in;
4a00c761
JJ
4616 int ncopies, i, j;
4617 tree lhs_type, rhs_type;
ebfd146a 4618 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4619 vec<tree> vec_oprnds0 = vNULL;
4620 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4621 tree vop0;
4a00c761 4622 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4623 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4624 int multi_step_cvt = 0;
6e1aa848 4625 vec<tree> interm_types = vNULL;
4a00c761
JJ
4626 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4627 int op_type;
4a00c761 4628 unsigned short fltsz;
ebfd146a
IR
4629
4630 /* Is STMT a vectorizable conversion? */
4631
4a00c761 4632 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4633 return false;
4634
66c16fd9
RB
4635 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4636 && ! vec_stmt)
ebfd146a
IR
4637 return false;
4638
4639 if (!is_gimple_assign (stmt))
4640 return false;
4641
4642 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4643 return false;
4644
4645 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4646 if (!CONVERT_EXPR_CODE_P (code)
4647 && code != FIX_TRUNC_EXPR
4648 && code != FLOAT_EXPR
4649 && code != WIDEN_MULT_EXPR
4650 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4651 return false;
4652
4a00c761
JJ
4653 op_type = TREE_CODE_LENGTH (code);
4654
ebfd146a 4655 /* Check types of lhs and rhs. */
b690cc0f 4656 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4657 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4658 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4659
ebfd146a
IR
4660 op0 = gimple_assign_rhs1 (stmt);
4661 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4662
4663 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4664 && !((INTEGRAL_TYPE_P (lhs_type)
4665 && INTEGRAL_TYPE_P (rhs_type))
4666 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4667 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4668 return false;
4669
e6f5c25d
IE
4670 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4671 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4672 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4673 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4674 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4675 {
73fbfcad 4676 if (dump_enabled_p ())
78c60e3d 4677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4678 "type conversion to/from bit-precision unsupported."
4679 "\n");
4a00c761
JJ
4680 return false;
4681 }
4682
b690cc0f 4683 /* Check the operands of the operation. */
894dd753 4684 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
b690cc0f 4685 {
73fbfcad 4686 if (dump_enabled_p ())
78c60e3d 4687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4688 "use not simple.\n");
b690cc0f
RG
4689 return false;
4690 }
4a00c761
JJ
4691 if (op_type == binary_op)
4692 {
4693 bool ok;
4694
4695 op1 = gimple_assign_rhs2 (stmt);
4696 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4697 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4698 OP1. */
4699 if (CONSTANT_CLASS_P (op0))
894dd753 4700 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4a00c761 4701 else
894dd753 4702 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4a00c761
JJ
4703
4704 if (!ok)
4705 {
73fbfcad 4706 if (dump_enabled_p ())
78c60e3d 4707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4708 "use not simple.\n");
4a00c761
JJ
4709 return false;
4710 }
4711 }
4712
b690cc0f
RG
4713 /* If op0 is an external or constant defs use a vector type of
4714 the same size as the output vector type. */
ebfd146a 4715 if (!vectype_in)
b690cc0f 4716 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4717 if (vec_stmt)
4718 gcc_assert (vectype_in);
4719 if (!vectype_in)
4720 {
73fbfcad 4721 if (dump_enabled_p ())
4a00c761 4722 {
78c60e3d
SS
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4724 "no vectype for scalar type ");
4725 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4726 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4727 }
7d8930a0
IR
4728
4729 return false;
4730 }
ebfd146a 4731
e6f5c25d
IE
4732 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4733 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4734 {
4735 if (dump_enabled_p ())
4736 {
4737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4738 "can't convert between boolean and non "
4739 "boolean vectors");
4740 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4741 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4742 }
4743
4744 return false;
4745 }
4746
b690cc0f
RG
4747 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4748 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4749 if (known_eq (nunits_out, nunits_in))
ebfd146a 4750 modifier = NONE;
062d5ccc
RS
4751 else if (multiple_p (nunits_out, nunits_in))
4752 modifier = NARROW;
ebfd146a 4753 else
062d5ccc
RS
4754 {
4755 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4756 modifier = WIDEN;
4757 }
ebfd146a 4758
ff802fa1
IR
4759 /* Multiple types in SLP are handled by creating the appropriate number of
4760 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4761 case of SLP. */
fce57248 4762 if (slp_node)
ebfd146a 4763 ncopies = 1;
4a00c761 4764 else if (modifier == NARROW)
e8f142e2 4765 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4766 else
e8f142e2 4767 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4768
ebfd146a
IR
4769 /* Sanity check: make sure that at least one copy of the vectorized stmt
4770 needs to be generated. */
4771 gcc_assert (ncopies >= 1);
4772
16d22000
RS
4773 bool found_mode = false;
4774 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4775 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4776 opt_scalar_mode rhs_mode_iter;
b397965c 4777
ebfd146a 4778 /* Supportable by target? */
4a00c761 4779 switch (modifier)
ebfd146a 4780 {
4a00c761
JJ
4781 case NONE:
4782 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4783 return false;
4784 if (supportable_convert_operation (code, vectype_out, vectype_in,
4785 &decl1, &code1))
4786 break;
4787 /* FALLTHRU */
4788 unsupported:
73fbfcad 4789 if (dump_enabled_p ())
78c60e3d 4790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4791 "conversion not supported by target.\n");
ebfd146a 4792 return false;
ebfd146a 4793
4a00c761
JJ
4794 case WIDEN:
4795 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4796 &code1, &code2, &multi_step_cvt,
4797 &interm_types))
4a00c761
JJ
4798 {
4799 /* Binary widening operation can only be supported directly by the
4800 architecture. */
4801 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4802 break;
4803 }
4804
4805 if (code != FLOAT_EXPR
b397965c 4806 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4807 goto unsupported;
4808
b397965c 4809 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4810 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4811 {
16d22000 4812 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4813 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4814 break;
4815
4a00c761
JJ
4816 cvt_type
4817 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4818 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4819 if (cvt_type == NULL_TREE)
4820 goto unsupported;
4821
4822 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4823 {
4824 if (!supportable_convert_operation (code, vectype_out,
4825 cvt_type, &decl1, &codecvt1))
4826 goto unsupported;
4827 }
4828 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4829 cvt_type, &codecvt1,
4830 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4831 &interm_types))
4832 continue;
4833 else
4834 gcc_assert (multi_step_cvt == 0);
4835
4836 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4837 vectype_in, &code1, &code2,
4838 &multi_step_cvt, &interm_types))
16d22000
RS
4839 {
4840 found_mode = true;
4841 break;
4842 }
4a00c761
JJ
4843 }
4844
16d22000 4845 if (!found_mode)
4a00c761
JJ
4846 goto unsupported;
4847
4848 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4849 codecvt2 = ERROR_MARK;
4850 else
4851 {
4852 multi_step_cvt++;
9771b263 4853 interm_types.safe_push (cvt_type);
4a00c761
JJ
4854 cvt_type = NULL_TREE;
4855 }
4856 break;
4857
4858 case NARROW:
4859 gcc_assert (op_type == unary_op);
4860 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4861 &code1, &multi_step_cvt,
4862 &interm_types))
4863 break;
4864
4865 if (code != FIX_TRUNC_EXPR
b397965c 4866 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4867 goto unsupported;
4868
4a00c761
JJ
4869 cvt_type
4870 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4871 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4872 if (cvt_type == NULL_TREE)
4873 goto unsupported;
4874 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4875 &decl1, &codecvt1))
4876 goto unsupported;
4877 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4878 &code1, &multi_step_cvt,
4879 &interm_types))
4880 break;
4881 goto unsupported;
4882
4883 default:
4884 gcc_unreachable ();
ebfd146a
IR
4885 }
4886
4887 if (!vec_stmt) /* transformation not required. */
4888 {
adac3a68 4889 DUMP_VECT_SCOPE ("vectorizable_conversion");
4a00c761 4890 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4891 {
4892 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4893 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4894 cost_vec);
8bd37302 4895 }
4a00c761
JJ
4896 else if (modifier == NARROW)
4897 {
4898 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4899 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4900 cost_vec);
4a00c761
JJ
4901 }
4902 else
4903 {
4904 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4905 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4906 cost_vec);
4a00c761 4907 }
9771b263 4908 interm_types.release ();
ebfd146a
IR
4909 return true;
4910 }
4911
67b8dbac 4912 /* Transform. */
73fbfcad 4913 if (dump_enabled_p ())
78c60e3d 4914 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4915 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4916
4a00c761
JJ
4917 if (op_type == binary_op)
4918 {
4919 if (CONSTANT_CLASS_P (op0))
4920 op0 = fold_convert (TREE_TYPE (op1), op0);
4921 else if (CONSTANT_CLASS_P (op1))
4922 op1 = fold_convert (TREE_TYPE (op0), op1);
4923 }
4924
4925 /* In case of multi-step conversion, we first generate conversion operations
4926 to the intermediate types, and then from that types to the final one.
4927 We create vector destinations for the intermediate type (TYPES) received
4928 from supportable_*_operation, and store them in the correct order
4929 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4930 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4931 vec_dest = vect_create_destination_var (scalar_dest,
4932 (cvt_type && modifier == WIDEN)
4933 ? cvt_type : vectype_out);
9771b263 4934 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4935
4936 if (multi_step_cvt)
4937 {
9771b263
DN
4938 for (i = interm_types.length () - 1;
4939 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4940 {
4941 vec_dest = vect_create_destination_var (scalar_dest,
4942 intermediate_type);
9771b263 4943 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4944 }
4945 }
ebfd146a 4946
4a00c761 4947 if (cvt_type)
82294ec1
JJ
4948 vec_dest = vect_create_destination_var (scalar_dest,
4949 modifier == WIDEN
4950 ? vectype_out : cvt_type);
4a00c761
JJ
4951
4952 if (!slp_node)
4953 {
30862efc 4954 if (modifier == WIDEN)
4a00c761 4955 {
c3284718 4956 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4957 if (op_type == binary_op)
9771b263 4958 vec_oprnds1.create (1);
4a00c761 4959 }
30862efc 4960 else if (modifier == NARROW)
9771b263
DN
4961 vec_oprnds0.create (
4962 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4963 }
4964 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4965 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4966
4a00c761 4967 last_oprnd = op0;
ebfd146a
IR
4968 prev_stmt_info = NULL;
4969 switch (modifier)
4970 {
4971 case NONE:
4972 for (j = 0; j < ncopies; j++)
4973 {
ebfd146a 4974 if (j == 0)
306b0c92 4975 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4976 else
4977 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4978
9771b263 4979 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761 4980 {
e1bd7296 4981 stmt_vec_info new_stmt_info;
4a00c761
JJ
4982 /* Arguments are ready, create the new vector stmt. */
4983 if (code1 == CALL_EXPR)
4984 {
e1bd7296 4985 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
4986 new_temp = make_ssa_name (vec_dest, new_stmt);
4987 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296
RS
4988 new_stmt_info
4989 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
4990 }
4991 else
4992 {
4993 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
e1bd7296
RS
4994 gassign *new_stmt
4995 = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4996 new_temp = make_ssa_name (vec_dest, new_stmt);
4997 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296
RS
4998 new_stmt_info
4999 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
5000 }
5001
4a00c761 5002 if (slp_node)
e1bd7296 5003 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
225ce44b
RB
5004 else
5005 {
5006 if (!prev_stmt_info)
e1bd7296
RS
5007 STMT_VINFO_VEC_STMT (stmt_info)
5008 = *vec_stmt = new_stmt_info;
225ce44b 5009 else
e1bd7296
RS
5010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5011 prev_stmt_info = new_stmt_info;
225ce44b 5012 }
4a00c761 5013 }
ebfd146a
IR
5014 }
5015 break;
5016
5017 case WIDEN:
5018 /* In case the vectorization factor (VF) is bigger than the number
5019 of elements that we can fit in a vectype (nunits), we have to
5020 generate more than one vector stmt - i.e - we need to "unroll"
5021 the vector stmt by a factor VF/nunits. */
5022 for (j = 0; j < ncopies; j++)
5023 {
4a00c761 5024 /* Handle uses. */
ebfd146a 5025 if (j == 0)
4a00c761
JJ
5026 {
5027 if (slp_node)
5028 {
5029 if (code == WIDEN_LSHIFT_EXPR)
5030 {
5031 unsigned int k;
ebfd146a 5032
4a00c761
JJ
5033 vec_oprnd1 = op1;
5034 /* Store vec_oprnd1 for every vector stmt to be created
5035 for SLP_NODE. We check during the analysis that all
5036 the shift arguments are the same. */
5037 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5038 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5039
5040 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5041 slp_node);
4a00c761
JJ
5042 }
5043 else
5044 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 5045 &vec_oprnds1, slp_node);
4a00c761
JJ
5046 }
5047 else
5048 {
81c40241 5049 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 5050 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5051 if (op_type == binary_op)
5052 {
5053 if (code == WIDEN_LSHIFT_EXPR)
5054 vec_oprnd1 = op1;
5055 else
81c40241 5056 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 5057 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5058 }
5059 }
5060 }
ebfd146a 5061 else
4a00c761
JJ
5062 {
5063 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
5064 vec_oprnds0.truncate (0);
5065 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5066 if (op_type == binary_op)
5067 {
5068 if (code == WIDEN_LSHIFT_EXPR)
5069 vec_oprnd1 = op1;
5070 else
5071 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5072 vec_oprnd1);
9771b263
DN
5073 vec_oprnds1.truncate (0);
5074 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5075 }
5076 }
ebfd146a 5077
4a00c761
JJ
5078 /* Arguments are ready. Create the new vector stmts. */
5079 for (i = multi_step_cvt; i >= 0; i--)
5080 {
9771b263 5081 tree this_dest = vec_dsts[i];
4a00c761
JJ
5082 enum tree_code c1 = code1, c2 = code2;
5083 if (i == 0 && codecvt2 != ERROR_MARK)
5084 {
5085 c1 = codecvt1;
5086 c2 = codecvt2;
5087 }
5088 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5089 &vec_oprnds1,
5090 stmt, this_dest, gsi,
5091 c1, c2, decl1, decl2,
5092 op_type);
5093 }
5094
9771b263 5095 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761 5096 {
e1bd7296 5097 stmt_vec_info new_stmt_info;
4a00c761
JJ
5098 if (cvt_type)
5099 {
5100 if (codecvt1 == CALL_EXPR)
5101 {
e1bd7296 5102 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
5103 new_temp = make_ssa_name (vec_dest, new_stmt);
5104 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296
RS
5105 new_stmt_info
5106 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
5107 }
5108 else
5109 {
5110 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5111 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
5112 gassign *new_stmt
5113 = gimple_build_assign (new_temp, codecvt1, vop0);
5114 new_stmt_info
5115 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761 5116 }
4a00c761
JJ
5117 }
5118 else
e1bd7296 5119 new_stmt_info = vinfo->lookup_def (vop0);
4a00c761
JJ
5120
5121 if (slp_node)
e1bd7296 5122 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4a00c761 5123 else
c689ce1e
RB
5124 {
5125 if (!prev_stmt_info)
e1bd7296 5126 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
c689ce1e 5127 else
e1bd7296
RS
5128 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5129 prev_stmt_info = new_stmt_info;
c689ce1e 5130 }
4a00c761 5131 }
ebfd146a 5132 }
4a00c761
JJ
5133
5134 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5135 break;
5136
5137 case NARROW:
5138 /* In case the vectorization factor (VF) is bigger than the number
5139 of elements that we can fit in a vectype (nunits), we have to
5140 generate more than one vector stmt - i.e - we need to "unroll"
5141 the vector stmt by a factor VF/nunits. */
5142 for (j = 0; j < ncopies; j++)
5143 {
5144 /* Handle uses. */
4a00c761
JJ
5145 if (slp_node)
5146 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5147 slp_node);
ebfd146a
IR
5148 else
5149 {
9771b263 5150 vec_oprnds0.truncate (0);
4a00c761
JJ
5151 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5152 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5153 }
5154
4a00c761
JJ
5155 /* Arguments are ready. Create the new vector stmts. */
5156 if (cvt_type)
9771b263 5157 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5158 {
5159 if (codecvt1 == CALL_EXPR)
5160 {
e1bd7296 5161 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
5162 new_temp = make_ssa_name (vec_dest, new_stmt);
5163 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296 5164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
5165 }
5166 else
5167 {
5168 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5169 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
5170 gassign *new_stmt
5171 = gimple_build_assign (new_temp, codecvt1, vop0);
5172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761 5173 }
ebfd146a 5174
9771b263 5175 vec_oprnds0[i] = new_temp;
4a00c761 5176 }
ebfd146a 5177
4a00c761
JJ
5178 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5179 stmt, vec_dsts, gsi,
5180 slp_node, code1,
5181 &prev_stmt_info);
ebfd146a
IR
5182 }
5183
5184 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5185 break;
ebfd146a
IR
5186 }
5187
9771b263
DN
5188 vec_oprnds0.release ();
5189 vec_oprnds1.release ();
9771b263 5190 interm_types.release ();
ebfd146a
IR
5191
5192 return true;
5193}
ff802fa1
IR
5194
5195
ebfd146a
IR
5196/* Function vectorizable_assignment.
5197
b8698a0f
L
5198 Check if STMT performs an assignment (copy) that can be vectorized.
5199 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5200 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5201 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5202
5203static bool
355fe088 5204vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 5205 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5206 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5207{
5208 tree vec_dest;
5209 tree scalar_dest;
5210 tree op;
5211 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5213 tree new_temp;
4fc5ebf1
JG
5214 enum vect_def_type dt[1] = {vect_unknown_def_type};
5215 int ndts = 1;
ebfd146a 5216 int ncopies;
f18b55bd 5217 int i, j;
6e1aa848 5218 vec<tree> vec_oprnds = vNULL;
ebfd146a 5219 tree vop;
a70d6342 5220 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5221 vec_info *vinfo = stmt_info->vinfo;
f18b55bd 5222 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5223 enum tree_code code;
5224 tree vectype_in;
ebfd146a 5225
a70d6342 5226 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5227 return false;
5228
66c16fd9
RB
5229 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5230 && ! vec_stmt)
ebfd146a
IR
5231 return false;
5232
5233 /* Is vectorizable assignment? */
5234 if (!is_gimple_assign (stmt))
5235 return false;
5236
5237 scalar_dest = gimple_assign_lhs (stmt);
5238 if (TREE_CODE (scalar_dest) != SSA_NAME)
5239 return false;
5240
fde9c428 5241 code = gimple_assign_rhs_code (stmt);
ebfd146a 5242 if (gimple_assign_single_p (stmt)
fde9c428
RG
5243 || code == PAREN_EXPR
5244 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5245 op = gimple_assign_rhs1 (stmt);
5246 else
5247 return false;
5248
7b7ec6c5
RG
5249 if (code == VIEW_CONVERT_EXPR)
5250 op = TREE_OPERAND (op, 0);
5251
465c8c19 5252 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5253 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5254
5255 /* Multiple types in SLP are handled by creating the appropriate number of
5256 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5257 case of SLP. */
fce57248 5258 if (slp_node)
465c8c19
JJ
5259 ncopies = 1;
5260 else
e8f142e2 5261 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5262
5263 gcc_assert (ncopies >= 1);
5264
894dd753 5265 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
ebfd146a 5266 {
73fbfcad 5267 if (dump_enabled_p ())
78c60e3d 5268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5269 "use not simple.\n");
ebfd146a
IR
5270 return false;
5271 }
5272
fde9c428
RG
5273 /* We can handle NOP_EXPR conversions that do not change the number
5274 of elements or the vector size. */
7b7ec6c5
RG
5275 if ((CONVERT_EXPR_CODE_P (code)
5276 || code == VIEW_CONVERT_EXPR)
fde9c428 5277 && (!vectype_in
928686b1 5278 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5279 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5280 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5281 return false;
5282
7b7b1813
RG
5283 /* We do not handle bit-precision changes. */
5284 if ((CONVERT_EXPR_CODE_P (code)
5285 || code == VIEW_CONVERT_EXPR)
5286 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5287 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5288 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5289 /* But a conversion that does not change the bit-pattern is ok. */
5290 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5291 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5292 && TYPE_UNSIGNED (TREE_TYPE (op)))
5293 /* Conversion between boolean types of different sizes is
5294 a simple assignment in case their vectypes are same
5295 boolean vectors. */
5296 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5297 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5298 {
73fbfcad 5299 if (dump_enabled_p ())
78c60e3d
SS
5300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5301 "type conversion to/from bit-precision "
e645e942 5302 "unsupported.\n");
7b7b1813
RG
5303 return false;
5304 }
5305
ebfd146a
IR
5306 if (!vec_stmt) /* transformation not required. */
5307 {
5308 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
adac3a68 5309 DUMP_VECT_SCOPE ("vectorizable_assignment");
68435eb2 5310 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5311 return true;
5312 }
5313
67b8dbac 5314 /* Transform. */
73fbfcad 5315 if (dump_enabled_p ())
e645e942 5316 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5317
5318 /* Handle def. */
5319 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5320
5321 /* Handle use. */
f18b55bd 5322 for (j = 0; j < ncopies; j++)
ebfd146a 5323 {
f18b55bd
IR
5324 /* Handle uses. */
5325 if (j == 0)
306b0c92 5326 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5327 else
5328 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5329
5330 /* Arguments are ready. create the new vector stmt. */
e1bd7296 5331 stmt_vec_info new_stmt_info = NULL;
9771b263 5332 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5333 {
7b7ec6c5
RG
5334 if (CONVERT_EXPR_CODE_P (code)
5335 || code == VIEW_CONVERT_EXPR)
4a73490d 5336 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
e1bd7296 5337 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
f18b55bd
IR
5338 new_temp = make_ssa_name (vec_dest, new_stmt);
5339 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 5340 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
f18b55bd 5341 if (slp_node)
e1bd7296 5342 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
f18b55bd 5343 }
ebfd146a
IR
5344
5345 if (slp_node)
f18b55bd
IR
5346 continue;
5347
5348 if (j == 0)
e1bd7296 5349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
f18b55bd 5350 else
e1bd7296 5351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
f18b55bd 5352
e1bd7296 5353 prev_stmt_info = new_stmt_info;
f18b55bd 5354 }
b8698a0f 5355
9771b263 5356 vec_oprnds.release ();
ebfd146a
IR
5357 return true;
5358}
5359
9dc3f7de 5360
1107f3ae
IR
5361/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5362 either as shift by a scalar or by a vector. */
5363
5364bool
5365vect_supportable_shift (enum tree_code code, tree scalar_type)
5366{
5367
ef4bddc2 5368 machine_mode vec_mode;
1107f3ae
IR
5369 optab optab;
5370 int icode;
5371 tree vectype;
5372
5373 vectype = get_vectype_for_scalar_type (scalar_type);
5374 if (!vectype)
5375 return false;
5376
5377 optab = optab_for_tree_code (code, vectype, optab_scalar);
5378 if (!optab
5379 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5380 {
5381 optab = optab_for_tree_code (code, vectype, optab_vector);
5382 if (!optab
5383 || (optab_handler (optab, TYPE_MODE (vectype))
5384 == CODE_FOR_nothing))
5385 return false;
5386 }
5387
5388 vec_mode = TYPE_MODE (vectype);
5389 icode = (int) optab_handler (optab, vec_mode);
5390 if (icode == CODE_FOR_nothing)
5391 return false;
5392
5393 return true;
5394}
5395
5396
9dc3f7de
IR
5397/* Function vectorizable_shift.
5398
5399 Check if STMT performs a shift operation that can be vectorized.
5400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5403
5404static bool
355fe088 5405vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 5406 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5407 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5408{
5409 tree vec_dest;
5410 tree scalar_dest;
5411 tree op0, op1 = NULL;
5412 tree vec_oprnd1 = NULL_TREE;
5413 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5414 tree vectype;
5415 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5416 enum tree_code code;
ef4bddc2 5417 machine_mode vec_mode;
9dc3f7de
IR
5418 tree new_temp;
5419 optab optab;
5420 int icode;
ef4bddc2 5421 machine_mode optab_op2_mode;
9dc3f7de 5422 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5423 int ndts = 2;
9dc3f7de 5424 stmt_vec_info prev_stmt_info;
928686b1
RS
5425 poly_uint64 nunits_in;
5426 poly_uint64 nunits_out;
9dc3f7de 5427 tree vectype_out;
cede2577 5428 tree op1_vectype;
9dc3f7de
IR
5429 int ncopies;
5430 int j, i;
6e1aa848
DN
5431 vec<tree> vec_oprnds0 = vNULL;
5432 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5433 tree vop0, vop1;
5434 unsigned int k;
49eab32e 5435 bool scalar_shift_arg = true;
9dc3f7de 5436 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5437 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5438
5439 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5440 return false;
5441
66c16fd9
RB
5442 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5443 && ! vec_stmt)
9dc3f7de
IR
5444 return false;
5445
5446 /* Is STMT a vectorizable binary/unary operation? */
5447 if (!is_gimple_assign (stmt))
5448 return false;
5449
5450 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5451 return false;
5452
5453 code = gimple_assign_rhs_code (stmt);
5454
5455 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5456 || code == RROTATE_EXPR))
5457 return false;
5458
5459 scalar_dest = gimple_assign_lhs (stmt);
5460 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5461 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5462 {
73fbfcad 5463 if (dump_enabled_p ())
78c60e3d 5464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5465 "bit-precision shifts not supported.\n");
7b7b1813
RG
5466 return false;
5467 }
9dc3f7de
IR
5468
5469 op0 = gimple_assign_rhs1 (stmt);
894dd753 5470 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
9dc3f7de 5471 {
73fbfcad 5472 if (dump_enabled_p ())
78c60e3d 5473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5474 "use not simple.\n");
9dc3f7de
IR
5475 return false;
5476 }
5477 /* If op0 is an external or constant def use a vector type with
5478 the same size as the output vector type. */
5479 if (!vectype)
5480 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5481 if (vec_stmt)
5482 gcc_assert (vectype);
5483 if (!vectype)
5484 {
73fbfcad 5485 if (dump_enabled_p ())
78c60e3d 5486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5487 "no vectype for scalar type\n");
9dc3f7de
IR
5488 return false;
5489 }
5490
5491 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5492 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5493 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5494 return false;
5495
5496 op1 = gimple_assign_rhs2 (stmt);
fef96d8e
RS
5497 stmt_vec_info op1_def_stmt_info;
5498 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5499 &op1_def_stmt_info))
9dc3f7de 5500 {
73fbfcad 5501 if (dump_enabled_p ())
78c60e3d 5502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5503 "use not simple.\n");
9dc3f7de
IR
5504 return false;
5505 }
5506
9dc3f7de
IR
5507 /* Multiple types in SLP are handled by creating the appropriate number of
5508 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5509 case of SLP. */
fce57248 5510 if (slp_node)
9dc3f7de
IR
5511 ncopies = 1;
5512 else
e8f142e2 5513 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5514
5515 gcc_assert (ncopies >= 1);
5516
5517 /* Determine whether the shift amount is a vector, or scalar. If the
5518 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5519
dbfa87aa
YR
5520 if ((dt[1] == vect_internal_def
5521 || dt[1] == vect_induction_def)
5522 && !slp_node)
49eab32e
JJ
5523 scalar_shift_arg = false;
5524 else if (dt[1] == vect_constant_def
5525 || dt[1] == vect_external_def
5526 || dt[1] == vect_internal_def)
5527 {
5528 /* In SLP, need to check whether the shift count is the same,
5529 in loops if it is a constant or invariant, it is always
5530 a scalar shift. */
5531 if (slp_node)
5532 {
b9787581
RS
5533 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5534 stmt_vec_info slpstmt_info;
49eab32e 5535
b9787581
RS
5536 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5537 {
5538 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5539 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5540 scalar_shift_arg = false;
5541 }
49eab32e 5542 }
60d393e8
RB
5543
5544 /* If the shift amount is computed by a pattern stmt we cannot
5545 use the scalar amount directly thus give up and use a vector
5546 shift. */
fef96d8e
RS
5547 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5548 scalar_shift_arg = false;
49eab32e
JJ
5549 }
5550 else
5551 {
73fbfcad 5552 if (dump_enabled_p ())
78c60e3d 5553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5554 "operand mode requires invariant argument.\n");
49eab32e
JJ
5555 return false;
5556 }
5557
9dc3f7de 5558 /* Vector shifted by vector. */
49eab32e 5559 if (!scalar_shift_arg)
9dc3f7de
IR
5560 {
5561 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5562 if (dump_enabled_p ())
78c60e3d 5563 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5564 "vector/vector shift/rotate found.\n");
78c60e3d 5565
aa948027
JJ
5566 if (!op1_vectype)
5567 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5568 if (op1_vectype == NULL_TREE
5569 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5570 {
73fbfcad 5571 if (dump_enabled_p ())
78c60e3d
SS
5572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5573 "unusable type for last operand in"
e645e942 5574 " vector/vector shift/rotate.\n");
cede2577
JJ
5575 return false;
5576 }
9dc3f7de
IR
5577 }
5578 /* See if the machine has a vector shifted by scalar insn and if not
5579 then see if it has a vector shifted by vector insn. */
49eab32e 5580 else
9dc3f7de
IR
5581 {
5582 optab = optab_for_tree_code (code, vectype, optab_scalar);
5583 if (optab
5584 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5585 {
73fbfcad 5586 if (dump_enabled_p ())
78c60e3d 5587 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5588 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5589 }
5590 else
5591 {
5592 optab = optab_for_tree_code (code, vectype, optab_vector);
5593 if (optab
5594 && (optab_handler (optab, TYPE_MODE (vectype))
5595 != CODE_FOR_nothing))
5596 {
49eab32e
JJ
5597 scalar_shift_arg = false;
5598
73fbfcad 5599 if (dump_enabled_p ())
78c60e3d 5600 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5601 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5602
5603 /* Unlike the other binary operators, shifts/rotates have
5604 the rhs being int, instead of the same type as the lhs,
5605 so make sure the scalar is the right type if we are
aa948027 5606 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5607 if (dt[1] == vect_constant_def)
5608 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5609 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5610 TREE_TYPE (op1)))
5611 {
5612 if (slp_node
5613 && TYPE_MODE (TREE_TYPE (vectype))
5614 != TYPE_MODE (TREE_TYPE (op1)))
5615 {
73fbfcad 5616 if (dump_enabled_p ())
78c60e3d
SS
5617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5618 "unusable type for last operand in"
e645e942 5619 " vector/vector shift/rotate.\n");
21c0a521 5620 return false;
aa948027
JJ
5621 }
5622 if (vec_stmt && !slp_node)
5623 {
5624 op1 = fold_convert (TREE_TYPE (vectype), op1);
5625 op1 = vect_init_vector (stmt, op1,
5626 TREE_TYPE (vectype), NULL);
5627 }
5628 }
9dc3f7de
IR
5629 }
5630 }
5631 }
9dc3f7de
IR
5632
5633 /* Supportable by target? */
5634 if (!optab)
5635 {
73fbfcad 5636 if (dump_enabled_p ())
78c60e3d 5637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5638 "no optab.\n");
9dc3f7de
IR
5639 return false;
5640 }
5641 vec_mode = TYPE_MODE (vectype);
5642 icode = (int) optab_handler (optab, vec_mode);
5643 if (icode == CODE_FOR_nothing)
5644 {
73fbfcad 5645 if (dump_enabled_p ())
78c60e3d 5646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5647 "op not supported by target.\n");
9dc3f7de 5648 /* Check only during analysis. */
cf098191 5649 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5650 || (!vec_stmt
5651 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5652 return false;
73fbfcad 5653 if (dump_enabled_p ())
e645e942
TJ
5654 dump_printf_loc (MSG_NOTE, vect_location,
5655 "proceeding using word mode.\n");
9dc3f7de
IR
5656 }
5657
5658 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5659 if (!vec_stmt
5660 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5661 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5662 {
73fbfcad 5663 if (dump_enabled_p ())
78c60e3d 5664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5665 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5666 return false;
5667 }
5668
5669 if (!vec_stmt) /* transformation not required. */
5670 {
5671 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
adac3a68 5672 DUMP_VECT_SCOPE ("vectorizable_shift");
68435eb2 5673 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5674 return true;
5675 }
5676
67b8dbac 5677 /* Transform. */
9dc3f7de 5678
73fbfcad 5679 if (dump_enabled_p ())
78c60e3d 5680 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5681 "transform binary/unary operation.\n");
9dc3f7de
IR
5682
5683 /* Handle def. */
5684 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5685
9dc3f7de
IR
5686 prev_stmt_info = NULL;
5687 for (j = 0; j < ncopies; j++)
5688 {
5689 /* Handle uses. */
5690 if (j == 0)
5691 {
5692 if (scalar_shift_arg)
5693 {
5694 /* Vector shl and shr insn patterns can be defined with scalar
5695 operand 2 (shift operand). In this case, use constant or loop
5696 invariant op1 directly, without extending it to vector mode
5697 first. */
5698 optab_op2_mode = insn_data[icode].operand[2].mode;
5699 if (!VECTOR_MODE_P (optab_op2_mode))
5700 {
73fbfcad 5701 if (dump_enabled_p ())
78c60e3d 5702 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5703 "operand 1 using scalar mode.\n");
9dc3f7de 5704 vec_oprnd1 = op1;
8930f723 5705 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5706 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5707 if (slp_node)
5708 {
5709 /* Store vec_oprnd1 for every vector stmt to be created
5710 for SLP_NODE. We check during the analysis that all
5711 the shift arguments are the same.
5712 TODO: Allow different constants for different vector
5713 stmts generated for an SLP instance. */
5714 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5715 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5716 }
5717 }
5718 }
5719
5720 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5721 (a special case for certain kind of vector shifts); otherwise,
5722 operand 1 should be of a vector type (the usual case). */
5723 if (vec_oprnd1)
5724 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5725 slp_node);
9dc3f7de
IR
5726 else
5727 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5728 slp_node);
9dc3f7de
IR
5729 }
5730 else
5731 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5732
5733 /* Arguments are ready. Create the new vector stmt. */
e1bd7296 5734 stmt_vec_info new_stmt_info = NULL;
9771b263 5735 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5736 {
9771b263 5737 vop1 = vec_oprnds1[i];
e1bd7296 5738 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5739 new_temp = make_ssa_name (vec_dest, new_stmt);
5740 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 5741 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
9dc3f7de 5742 if (slp_node)
e1bd7296 5743 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9dc3f7de
IR
5744 }
5745
5746 if (slp_node)
5747 continue;
5748
5749 if (j == 0)
e1bd7296 5750 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9dc3f7de 5751 else
e1bd7296
RS
5752 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5753 prev_stmt_info = new_stmt_info;
9dc3f7de
IR
5754 }
5755
9771b263
DN
5756 vec_oprnds0.release ();
5757 vec_oprnds1.release ();
9dc3f7de
IR
5758
5759 return true;
5760}
5761
5762
ebfd146a
IR
5763/* Function vectorizable_operation.
5764
16949072
RG
5765 Check if STMT performs a binary, unary or ternary operation that can
5766 be vectorized.
b8698a0f 5767 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5768 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5769 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5770
5771static bool
355fe088 5772vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 5773 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5774 stmt_vector_for_cost *cost_vec)
ebfd146a 5775{
00f07b86 5776 tree vec_dest;
ebfd146a 5777 tree scalar_dest;
16949072 5778 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5779 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5780 tree vectype;
ebfd146a 5781 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5782 enum tree_code code, orig_code;
ef4bddc2 5783 machine_mode vec_mode;
ebfd146a
IR
5784 tree new_temp;
5785 int op_type;
00f07b86 5786 optab optab;
523ba738 5787 bool target_support_p;
16949072
RG
5788 enum vect_def_type dt[3]
5789 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5790 int ndts = 3;
ebfd146a 5791 stmt_vec_info prev_stmt_info;
928686b1
RS
5792 poly_uint64 nunits_in;
5793 poly_uint64 nunits_out;
ebfd146a
IR
5794 tree vectype_out;
5795 int ncopies;
5796 int j, i;
6e1aa848
DN
5797 vec<tree> vec_oprnds0 = vNULL;
5798 vec<tree> vec_oprnds1 = vNULL;
5799 vec<tree> vec_oprnds2 = vNULL;
16949072 5800 tree vop0, vop1, vop2;
a70d6342 5801 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5802 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5803
a70d6342 5804 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5805 return false;
5806
66c16fd9
RB
5807 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5808 && ! vec_stmt)
ebfd146a
IR
5809 return false;
5810
5811 /* Is STMT a vectorizable binary/unary operation? */
5812 if (!is_gimple_assign (stmt))
5813 return false;
5814
5815 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5816 return false;
5817
0eb952ea 5818 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5819
1af4ebf5
MG
5820 /* For pointer addition and subtraction, we should use the normal
5821 plus and minus for the vector operation. */
ebfd146a
IR
5822 if (code == POINTER_PLUS_EXPR)
5823 code = PLUS_EXPR;
1af4ebf5
MG
5824 if (code == POINTER_DIFF_EXPR)
5825 code = MINUS_EXPR;
ebfd146a
IR
5826
5827 /* Support only unary or binary operations. */
5828 op_type = TREE_CODE_LENGTH (code);
16949072 5829 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5830 {
73fbfcad 5831 if (dump_enabled_p ())
78c60e3d 5832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5833 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5834 op_type);
ebfd146a
IR
5835 return false;
5836 }
5837
b690cc0f
RG
5838 scalar_dest = gimple_assign_lhs (stmt);
5839 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5840
7b7b1813
RG
5841 /* Most operations cannot handle bit-precision types without extra
5842 truncations. */
045c1278 5843 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5844 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5845 /* Exception are bitwise binary operations. */
5846 && code != BIT_IOR_EXPR
5847 && code != BIT_XOR_EXPR
5848 && code != BIT_AND_EXPR)
5849 {
73fbfcad 5850 if (dump_enabled_p ())
78c60e3d 5851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5852 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5853 return false;
5854 }
5855
ebfd146a 5856 op0 = gimple_assign_rhs1 (stmt);
894dd753 5857 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
ebfd146a 5858 {
73fbfcad 5859 if (dump_enabled_p ())
78c60e3d 5860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5861 "use not simple.\n");
ebfd146a
IR
5862 return false;
5863 }
b690cc0f
RG
5864 /* If op0 is an external or constant def use a vector type with
5865 the same size as the output vector type. */
5866 if (!vectype)
b036c6c5
IE
5867 {
5868 /* For boolean type we cannot determine vectype by
5869 invariant value (don't know whether it is a vector
5870 of booleans or vector of integers). We use output
5871 vectype because operations on boolean don't change
5872 type. */
2568d8a1 5873 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5874 {
2568d8a1 5875 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5876 {
5877 if (dump_enabled_p ())
5878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5879 "not supported operation on bool value.\n");
5880 return false;
5881 }
5882 vectype = vectype_out;
5883 }
5884 else
5885 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5886 }
7d8930a0
IR
5887 if (vec_stmt)
5888 gcc_assert (vectype);
5889 if (!vectype)
5890 {
73fbfcad 5891 if (dump_enabled_p ())
7d8930a0 5892 {
78c60e3d
SS
5893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5894 "no vectype for scalar type ");
5895 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5896 TREE_TYPE (op0));
e645e942 5897 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5898 }
5899
5900 return false;
5901 }
b690cc0f
RG
5902
5903 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5904 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5905 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5906 return false;
ebfd146a 5907
16949072 5908 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5909 {
5910 op1 = gimple_assign_rhs2 (stmt);
894dd753 5911 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
ebfd146a 5912 {
73fbfcad 5913 if (dump_enabled_p ())
78c60e3d 5914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5915 "use not simple.\n");
ebfd146a
IR
5916 return false;
5917 }
5918 }
16949072
RG
5919 if (op_type == ternary_op)
5920 {
5921 op2 = gimple_assign_rhs3 (stmt);
894dd753 5922 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
16949072 5923 {
73fbfcad 5924 if (dump_enabled_p ())
78c60e3d 5925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5926 "use not simple.\n");
16949072
RG
5927 return false;
5928 }
5929 }
ebfd146a 5930
b690cc0f 5931 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5933 case of SLP. */
fce57248 5934 if (slp_node)
b690cc0f
RG
5935 ncopies = 1;
5936 else
e8f142e2 5937 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5938
5939 gcc_assert (ncopies >= 1);
5940
9dc3f7de 5941 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5942 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5943 || code == RROTATE_EXPR)
9dc3f7de 5944 return false;
ebfd146a 5945
ebfd146a 5946 /* Supportable by target? */
00f07b86
RH
5947
5948 vec_mode = TYPE_MODE (vectype);
5949 if (code == MULT_HIGHPART_EXPR)
523ba738 5950 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5951 else
5952 {
5953 optab = optab_for_tree_code (code, vectype, optab_default);
5954 if (!optab)
5deb57cb 5955 {
73fbfcad 5956 if (dump_enabled_p ())
78c60e3d 5957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5958 "no optab.\n");
00f07b86 5959 return false;
5deb57cb 5960 }
523ba738
RS
5961 target_support_p = (optab_handler (optab, vec_mode)
5962 != CODE_FOR_nothing);
5deb57cb
JJ
5963 }
5964
523ba738 5965 if (!target_support_p)
ebfd146a 5966 {
73fbfcad 5967 if (dump_enabled_p ())
78c60e3d 5968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5969 "op not supported by target.\n");
ebfd146a 5970 /* Check only during analysis. */
cf098191 5971 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5972 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5973 return false;
73fbfcad 5974 if (dump_enabled_p ())
e645e942
TJ
5975 dump_printf_loc (MSG_NOTE, vect_location,
5976 "proceeding using word mode.\n");
383d9c83
IR
5977 }
5978
4a00c761 5979 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5980 if (!VECTOR_MODE_P (vec_mode)
5981 && !vec_stmt
ca09abcb 5982 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5983 {
73fbfcad 5984 if (dump_enabled_p ())
78c60e3d 5985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5986 "not worthwhile without SIMD support.\n");
e34842c6 5987 return false;
7d8930a0 5988 }
ebfd146a 5989
ebfd146a
IR
5990 if (!vec_stmt) /* transformation not required. */
5991 {
4a00c761 5992 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
adac3a68 5993 DUMP_VECT_SCOPE ("vectorizable_operation");
68435eb2 5994 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5995 return true;
5996 }
5997
67b8dbac 5998 /* Transform. */
ebfd146a 5999
73fbfcad 6000 if (dump_enabled_p ())
78c60e3d 6001 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6002 "transform binary/unary operation.\n");
383d9c83 6003
0eb952ea
JJ
6004 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6005 vectors with unsigned elements, but the result is signed. So, we
6006 need to compute the MINUS_EXPR into vectype temporary and
6007 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6008 tree vec_cvt_dest = NULL_TREE;
6009 if (orig_code == POINTER_DIFF_EXPR)
7b76867b
RB
6010 {
6011 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6012 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6013 }
6014 /* Handle def. */
6015 else
6016 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
0eb952ea 6017
ebfd146a
IR
6018 /* In case the vectorization factor (VF) is bigger than the number
6019 of elements that we can fit in a vectype (nunits), we have to generate
6020 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
6021 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6022 from one copy of the vector stmt to the next, in the field
6023 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6024 stages to find the correct vector defs to be used when vectorizing
6025 stmts that use the defs of the current stmt. The example below
6026 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6027 we need to create 4 vectorized stmts):
6028
6029 before vectorization:
6030 RELATED_STMT VEC_STMT
6031 S1: x = memref - -
6032 S2: z = x + 1 - -
6033
6034 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6035 there):
6036 RELATED_STMT VEC_STMT
6037 VS1_0: vx0 = memref0 VS1_1 -
6038 VS1_1: vx1 = memref1 VS1_2 -
6039 VS1_2: vx2 = memref2 VS1_3 -
6040 VS1_3: vx3 = memref3 - -
6041 S1: x = load - VS1_0
6042 S2: z = x + 1 - -
6043
6044 step2: vectorize stmt S2 (done here):
6045 To vectorize stmt S2 we first need to find the relevant vector
6046 def for the first operand 'x'. This is, as usual, obtained from
6047 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6048 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6049 relevant vector def 'vx0'. Having found 'vx0' we can generate
6050 the vector stmt VS2_0, and as usual, record it in the
6051 STMT_VINFO_VEC_STMT of stmt S2.
6052 When creating the second copy (VS2_1), we obtain the relevant vector
6053 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6054 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6055 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6056 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6057 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6058 chain of stmts and pointers:
6059 RELATED_STMT VEC_STMT
6060 VS1_0: vx0 = memref0 VS1_1 -
6061 VS1_1: vx1 = memref1 VS1_2 -
6062 VS1_2: vx2 = memref2 VS1_3 -
6063 VS1_3: vx3 = memref3 - -
6064 S1: x = load - VS1_0
6065 VS2_0: vz0 = vx0 + v1 VS2_1 -
6066 VS2_1: vz1 = vx1 + v1 VS2_2 -
6067 VS2_2: vz2 = vx2 + v1 VS2_3 -
6068 VS2_3: vz3 = vx3 + v1 - -
6069 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6070
6071 prev_stmt_info = NULL;
6072 for (j = 0; j < ncopies; j++)
6073 {
6074 /* Handle uses. */
6075 if (j == 0)
4a00c761 6076 {
d6476f90 6077 if (op_type == binary_op)
4a00c761 6078 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 6079 slp_node);
d6476f90
RB
6080 else if (op_type == ternary_op)
6081 {
6082 if (slp_node)
6083 {
6084 auto_vec<tree> ops(3);
6085 ops.quick_push (op0);
6086 ops.quick_push (op1);
6087 ops.quick_push (op2);
6088 auto_vec<vec<tree> > vec_defs(3);
6089 vect_get_slp_defs (ops, slp_node, &vec_defs);
6090 vec_oprnds0 = vec_defs[0];
6091 vec_oprnds1 = vec_defs[1];
6092 vec_oprnds2 = vec_defs[2];
6093 }
6094 else
6095 {
6096 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6097 NULL);
6098 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6099 NULL);
6100 }
6101 }
4a00c761
JJ
6102 else
6103 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 6104 slp_node);
4a00c761 6105 }
ebfd146a 6106 else
4a00c761
JJ
6107 {
6108 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6109 if (op_type == ternary_op)
6110 {
9771b263
DN
6111 tree vec_oprnd = vec_oprnds2.pop ();
6112 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6113 vec_oprnd));
4a00c761
JJ
6114 }
6115 }
6116
6117 /* Arguments are ready. Create the new vector stmt. */
e1bd7296 6118 stmt_vec_info new_stmt_info = NULL;
9771b263 6119 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6120 {
4a00c761 6121 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6122 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6123 vop2 = ((op_type == ternary_op)
9771b263 6124 ? vec_oprnds2[i] : NULL_TREE);
e1bd7296
RS
6125 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6126 vop0, vop1, vop2);
4a00c761
JJ
6127 new_temp = make_ssa_name (vec_dest, new_stmt);
6128 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 6129 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
6130 if (vec_cvt_dest)
6131 {
6132 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
e1bd7296
RS
6133 gassign *new_stmt
6134 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6135 new_temp);
0eb952ea
JJ
6136 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6137 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296
RS
6138 new_stmt_info
6139 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea 6140 }
4a00c761 6141 if (slp_node)
e1bd7296 6142 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
ebfd146a
IR
6143 }
6144
4a00c761
JJ
6145 if (slp_node)
6146 continue;
6147
6148 if (j == 0)
e1bd7296 6149 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4a00c761 6150 else
e1bd7296
RS
6151 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6152 prev_stmt_info = new_stmt_info;
ebfd146a
IR
6153 }
6154
9771b263
DN
6155 vec_oprnds0.release ();
6156 vec_oprnds1.release ();
6157 vec_oprnds2.release ();
ebfd146a 6158
ebfd146a
IR
6159 return true;
6160}
6161
f702e7d4 6162/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6163
6164static void
f702e7d4 6165ensure_base_align (struct data_reference *dr)
c716e67f 6166{
ca823c85 6167 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
c716e67f
XDL
6168 return;
6169
52639a61 6170 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6171 {
52639a61 6172 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6173
f702e7d4
RS
6174 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6175
428f0c67 6176 if (decl_in_symtab_p (base_decl))
f702e7d4 6177 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6178 else
6179 {
f702e7d4 6180 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6181 DECL_USER_ALIGN (base_decl) = 1;
6182 }
52639a61 6183 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6184 }
6185}
6186
ebfd146a 6187
44fc7854
BE
6188/* Function get_group_alias_ptr_type.
6189
6190 Return the alias type for the group starting at FIRST_STMT. */
6191
6192static tree
6193get_group_alias_ptr_type (gimple *first_stmt)
6194{
6195 struct data_reference *first_dr, *next_dr;
44fc7854
BE
6196
6197 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
bffb8014
RS
6198 stmt_vec_info next_stmt_info
6199 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6200 while (next_stmt_info)
44fc7854 6201 {
bffb8014 6202 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
44fc7854
BE
6203 if (get_alias_set (DR_REF (first_dr))
6204 != get_alias_set (DR_REF (next_dr)))
6205 {
6206 if (dump_enabled_p ())
6207 dump_printf_loc (MSG_NOTE, vect_location,
6208 "conflicting alias set types.\n");
6209 return ptr_type_node;
6210 }
bffb8014 6211 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
44fc7854
BE
6212 }
6213 return reference_alias_ptr_type (DR_REF (first_dr));
6214}
6215
6216
ebfd146a
IR
6217/* Function vectorizable_store.
6218
b8698a0f
L
6219 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6220 can be vectorized.
6221 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6222 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6223 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6224
6225static bool
1eede195
RS
6226vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi,
6227 stmt_vec_info *vec_stmt, slp_tree slp_node,
6228 stmt_vector_for_cost *cost_vec)
ebfd146a 6229{
ebfd146a
IR
6230 tree data_ref;
6231 tree op;
6232 tree vec_oprnd = NULL_TREE;
6233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6234 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6235 tree elem_type;
ebfd146a 6236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6237 struct loop *loop = NULL;
ef4bddc2 6238 machine_mode vec_mode;
ebfd146a
IR
6239 tree dummy;
6240 enum dr_alignment_support alignment_support_scheme;
929b4411
RS
6241 enum vect_def_type rhs_dt = vect_unknown_def_type;
6242 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6243 stmt_vec_info prev_stmt_info = NULL;
6244 tree dataref_ptr = NULL_TREE;
74bf76ed 6245 tree dataref_offset = NULL_TREE;
355fe088 6246 gimple *ptr_incr = NULL;
ebfd146a
IR
6247 int ncopies;
6248 int j;
bffb8014 6249 stmt_vec_info first_stmt_info;
2de001ee 6250 bool grouped_store;
ebfd146a 6251 unsigned int group_size, i;
6e1aa848
DN
6252 vec<tree> oprnds = vNULL;
6253 vec<tree> result_chain = vNULL;
ebfd146a 6254 bool inv_p;
09dfa495 6255 tree offset = NULL_TREE;
6e1aa848 6256 vec<tree> vec_oprnds = vNULL;
ebfd146a 6257 bool slp = (slp_node != NULL);
ebfd146a 6258 unsigned int vec_num;
a70d6342 6259 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6260 vec_info *vinfo = stmt_info->vinfo;
272c6793 6261 tree aggr_type;
134c85ca 6262 gather_scatter_info gs_info;
d9f21f6a 6263 poly_uint64 vf;
2de001ee 6264 vec_load_store_type vls_type;
44fc7854 6265 tree ref_type;
a70d6342 6266
a70d6342 6267 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6268 return false;
6269
66c16fd9
RB
6270 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6271 && ! vec_stmt)
ebfd146a
IR
6272 return false;
6273
6274 /* Is vectorizable store? */
6275
c3a8f964
RS
6276 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6277 if (is_gimple_assign (stmt))
6278 {
6279 tree scalar_dest = gimple_assign_lhs (stmt);
6280 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6281 && is_pattern_stmt_p (stmt_info))
6282 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6283 if (TREE_CODE (scalar_dest) != ARRAY_REF
6284 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6285 && TREE_CODE (scalar_dest) != INDIRECT_REF
6286 && TREE_CODE (scalar_dest) != COMPONENT_REF
6287 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6288 && TREE_CODE (scalar_dest) != REALPART_EXPR
6289 && TREE_CODE (scalar_dest) != MEM_REF)
6290 return false;
6291 }
6292 else
6293 {
6294 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6295 if (!call || !gimple_call_internal_p (call))
6296 return false;
6297
6298 internal_fn ifn = gimple_call_internal_fn (call);
6299 if (!internal_store_fn_p (ifn))
c3a8f964 6300 return false;
ebfd146a 6301
c3a8f964
RS
6302 if (slp_node != NULL)
6303 {
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6306 "SLP of masked stores not supported.\n");
6307 return false;
6308 }
6309
f307441a
RS
6310 int mask_index = internal_fn_mask_index (ifn);
6311 if (mask_index >= 0)
6312 {
6313 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6314 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6315 &mask_vectype))
f307441a
RS
6316 return false;
6317 }
c3a8f964
RS
6318 }
6319
6320 op = vect_get_store_rhs (stmt);
ebfd146a 6321
fce57248
RS
6322 /* Cannot have hybrid store SLP -- that would mean storing to the
6323 same location twice. */
6324 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6325
f4d09712 6326 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6327 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6328
6329 if (loop_vinfo)
b17dc4d4
RB
6330 {
6331 loop = LOOP_VINFO_LOOP (loop_vinfo);
6332 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6333 }
6334 else
6335 vf = 1;
465c8c19
JJ
6336
6337 /* Multiple types in SLP are handled by creating the appropriate number of
6338 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6339 case of SLP. */
fce57248 6340 if (slp)
465c8c19
JJ
6341 ncopies = 1;
6342 else
e8f142e2 6343 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6344
6345 gcc_assert (ncopies >= 1);
6346
6347 /* FORNOW. This restriction should be relaxed. */
6348 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6349 {
6350 if (dump_enabled_p ())
6351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6352 "multiple types in nested loop.\n");
6353 return false;
6354 }
6355
929b4411 6356 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6357 return false;
6358
272c6793 6359 elem_type = TREE_TYPE (vectype);
ebfd146a 6360 vec_mode = TYPE_MODE (vectype);
7b7b1813 6361
ebfd146a
IR
6362 if (!STMT_VINFO_DATA_REF (stmt_info))
6363 return false;
6364
2de001ee 6365 vect_memory_access_type memory_access_type;
7e11fc7f 6366 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6367 &memory_access_type, &gs_info))
6368 return false;
3bab6342 6369
c3a8f964
RS
6370 if (mask)
6371 {
7e11fc7f
RS
6372 if (memory_access_type == VMAT_CONTIGUOUS)
6373 {
6374 if (!VECTOR_MODE_P (vec_mode)
6375 || !can_vec_mask_load_store_p (vec_mode,
6376 TYPE_MODE (mask_vectype), false))
6377 return false;
6378 }
f307441a
RS
6379 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6380 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6381 {
6382 if (dump_enabled_p ())
6383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6384 "unsupported access type for masked store.\n");
6385 return false;
6386 }
c3a8f964
RS
6387 }
6388 else
6389 {
6390 /* FORNOW. In some cases can vectorize even if data-type not supported
6391 (e.g. - array initialization with 0). */
6392 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6393 return false;
6394 }
6395
f307441a 6396 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6397 && memory_access_type != VMAT_GATHER_SCATTER
6398 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6399 if (grouped_store)
6400 {
bffb8014
RS
6401 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6402 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6403 group_size = DR_GROUP_SIZE (first_stmt_info);
7cfb4d93
RS
6404 }
6405 else
6406 {
bffb8014 6407 first_stmt_info = stmt_info;
7cfb4d93
RS
6408 first_dr = dr;
6409 group_size = vec_num = 1;
6410 }
6411
ebfd146a
IR
6412 if (!vec_stmt) /* transformation not required. */
6413 {
2de001ee 6414 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6415
6416 if (loop_vinfo
6417 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6418 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6419 memory_access_type, &gs_info);
7cfb4d93 6420
ebfd146a 6421 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6422 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6423 vls_type, slp_node, cost_vec);
ebfd146a
IR
6424 return true;
6425 }
2de001ee 6426 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6427
67b8dbac 6428 /* Transform. */
ebfd146a 6429
f702e7d4 6430 ensure_base_align (dr);
c716e67f 6431
f307441a 6432 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6433 {
c3a8f964 6434 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6435 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6436 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6437 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6438 edge pe = loop_preheader_edge (loop);
6439 gimple_seq seq;
6440 basic_block new_bb;
6441 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6442 poly_uint64 scatter_off_nunits
6443 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6444
4d694b27 6445 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6446 modifier = NONE;
4d694b27 6447 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6448 {
3bab6342
AT
6449 modifier = WIDEN;
6450
4d694b27
RS
6451 /* Currently gathers and scatters are only supported for
6452 fixed-length vectors. */
6453 unsigned int count = scatter_off_nunits.to_constant ();
6454 vec_perm_builder sel (count, count, 1);
6455 for (i = 0; i < (unsigned int) count; ++i)
6456 sel.quick_push (i | (count / 2));
3bab6342 6457
4d694b27 6458 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6459 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6460 indices);
3bab6342
AT
6461 gcc_assert (perm_mask != NULL_TREE);
6462 }
4d694b27 6463 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6464 {
3bab6342
AT
6465 modifier = NARROW;
6466
4d694b27
RS
6467 /* Currently gathers and scatters are only supported for
6468 fixed-length vectors. */
6469 unsigned int count = nunits.to_constant ();
6470 vec_perm_builder sel (count, count, 1);
6471 for (i = 0; i < (unsigned int) count; ++i)
6472 sel.quick_push (i | (count / 2));
3bab6342 6473
4d694b27 6474 vec_perm_indices indices (sel, 2, count);
e3342de4 6475 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6476 gcc_assert (perm_mask != NULL_TREE);
6477 ncopies *= 2;
6478 }
6479 else
6480 gcc_unreachable ();
6481
134c85ca 6482 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6483 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6484 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6485 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6486 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6487 scaletype = TREE_VALUE (arglist);
6488
6489 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6490 && TREE_CODE (rettype) == VOID_TYPE);
6491
134c85ca 6492 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6493 if (!is_gimple_min_invariant (ptr))
6494 {
6495 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6496 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6497 gcc_assert (!new_bb);
6498 }
6499
6500 /* Currently we support only unconditional scatter stores,
6501 so mask should be all ones. */
6502 mask = build_int_cst (masktype, -1);
6503 mask = vect_init_vector (stmt, mask, masktype, NULL);
6504
134c85ca 6505 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6506
6507 prev_stmt_info = NULL;
6508 for (j = 0; j < ncopies; ++j)
6509 {
6510 if (j == 0)
6511 {
6512 src = vec_oprnd1
c3a8f964 6513 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6514 op = vec_oprnd0
134c85ca 6515 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6516 }
6517 else if (modifier != NONE && (j & 1))
6518 {
6519 if (modifier == WIDEN)
6520 {
6521 src = vec_oprnd1
929b4411 6522 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6523 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6524 stmt, gsi);
6525 }
6526 else if (modifier == NARROW)
6527 {
6528 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6529 stmt, gsi);
6530 op = vec_oprnd0
134c85ca
RS
6531 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6532 vec_oprnd0);
3bab6342
AT
6533 }
6534 else
6535 gcc_unreachable ();
6536 }
6537 else
6538 {
6539 src = vec_oprnd1
929b4411 6540 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6541 op = vec_oprnd0
134c85ca
RS
6542 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6543 vec_oprnd0);
3bab6342
AT
6544 }
6545
6546 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6547 {
928686b1
RS
6548 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6549 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6550 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342 6551 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
e1bd7296
RS
6552 gassign *new_stmt
6553 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
3bab6342
AT
6554 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6555 src = var;
6556 }
6557
6558 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6559 {
928686b1
RS
6560 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6561 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6562 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342 6563 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
e1bd7296
RS
6564 gassign *new_stmt
6565 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3bab6342
AT
6566 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6567 op = var;
6568 }
6569
e1bd7296 6570 gcall *new_stmt
134c85ca 6571 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
e1bd7296
RS
6572 stmt_vec_info new_stmt_info
6573 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3bab6342 6574
dbe1b846 6575 if (prev_stmt_info == NULL_STMT_VEC_INFO)
e1bd7296 6576 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3bab6342 6577 else
e1bd7296
RS
6578 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6579 prev_stmt_info = new_stmt_info;
3bab6342
AT
6580 }
6581 return true;
6582 }
6583
f307441a 6584 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
bffb8014 6585 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
ebfd146a 6586
f307441a
RS
6587 if (grouped_store)
6588 {
ebfd146a 6589 /* FORNOW */
a70d6342 6590 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6591
6592 /* We vectorize all the stmts of the interleaving group when we
6593 reach the last stmt in the group. */
bffb8014
RS
6594 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6595 < DR_GROUP_SIZE (first_stmt_info)
ebfd146a
IR
6596 && !slp)
6597 {
6598 *vec_stmt = NULL;
6599 return true;
6600 }
6601
6602 if (slp)
4b5caab7 6603 {
0d0293ac 6604 grouped_store = false;
4b5caab7
IR
6605 /* VEC_NUM is the number of vect stmts to be created for this
6606 group. */
6607 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
bffb8014
RS
6608 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6609 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6610 == first_stmt_info);
6611 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6612 op = vect_get_store_rhs (first_stmt_info);
4b5caab7 6613 }
ebfd146a 6614 else
4b5caab7
IR
6615 /* VEC_NUM is the number of vect stmts to be created for this
6616 group. */
ebfd146a 6617 vec_num = group_size;
44fc7854 6618
bffb8014 6619 ref_type = get_group_alias_ptr_type (first_stmt_info);
ebfd146a 6620 }
b8698a0f 6621 else
7cfb4d93 6622 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6623
73fbfcad 6624 if (dump_enabled_p ())
78c60e3d 6625 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6626 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6627
2de001ee
RS
6628 if (memory_access_type == VMAT_ELEMENTWISE
6629 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6630 {
6631 gimple_stmt_iterator incr_gsi;
6632 bool insert_after;
355fe088 6633 gimple *incr;
f2e2a985
MM
6634 tree offvar;
6635 tree ivstep;
6636 tree running_off;
f2e2a985
MM
6637 tree stride_base, stride_step, alias_off;
6638 tree vec_oprnd;
f502d50e 6639 unsigned int g;
4d694b27
RS
6640 /* Checked by get_load_store_type. */
6641 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6642
7cfb4d93 6643 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6644 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6645
6646 stride_base
6647 = fold_build_pointer_plus
b210f45f 6648 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6649 size_binop (PLUS_EXPR,
b210f45f 6650 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6651 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6652 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6653
6654 /* For a store with loop-invariant (but other than power-of-2)
6655 stride (i.e. not a grouped access) like so:
6656
6657 for (i = 0; i < n; i += stride)
6658 array[i] = ...;
6659
6660 we generate a new induction variable and new stores from
6661 the components of the (vectorized) rhs:
6662
6663 for (j = 0; ; j += VF*stride)
6664 vectemp = ...;
6665 tmp1 = vectemp[0];
6666 array[j] = tmp1;
6667 tmp2 = vectemp[1];
6668 array[j + stride] = tmp2;
6669 ...
6670 */
6671
4d694b27 6672 unsigned nstores = const_nunits;
b17dc4d4 6673 unsigned lnel = 1;
cee62fee 6674 tree ltype = elem_type;
04199738 6675 tree lvectype = vectype;
cee62fee
MM
6676 if (slp)
6677 {
4d694b27
RS
6678 if (group_size < const_nunits
6679 && const_nunits % group_size == 0)
b17dc4d4 6680 {
4d694b27 6681 nstores = const_nunits / group_size;
b17dc4d4
RB
6682 lnel = group_size;
6683 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6684 lvectype = vectype;
6685
6686 /* First check if vec_extract optab doesn't support extraction
6687 of vector elts directly. */
b397965c 6688 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6689 machine_mode vmode;
6690 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6691 || !VECTOR_MODE_P (vmode)
414fef4e 6692 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6693 || (convert_optab_handler (vec_extract_optab,
6694 TYPE_MODE (vectype), vmode)
6695 == CODE_FOR_nothing))
6696 {
6697 /* Try to avoid emitting an extract of vector elements
6698 by performing the extracts using an integer type of the
6699 same size, extracting from a vector of those and then
6700 re-interpreting it as the original vector type if
6701 supported. */
6702 unsigned lsize
6703 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6704 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6705 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6706 /* If we can't construct such a vector fall back to
6707 element extracts from the original vector type and
6708 element size stores. */
4d694b27 6709 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6710 && VECTOR_MODE_P (vmode)
414fef4e 6711 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6712 && (convert_optab_handler (vec_extract_optab,
6713 vmode, elmode)
6714 != CODE_FOR_nothing))
6715 {
4d694b27 6716 nstores = lnunits;
04199738
RB
6717 lnel = group_size;
6718 ltype = build_nonstandard_integer_type (lsize, 1);
6719 lvectype = build_vector_type (ltype, nstores);
6720 }
6721 /* Else fall back to vector extraction anyway.
6722 Fewer stores are more important than avoiding spilling
6723 of the vector we extract from. Compared to the
6724 construction case in vectorizable_load no store-forwarding
6725 issue exists here for reasonable archs. */
6726 }
b17dc4d4 6727 }
4d694b27
RS
6728 else if (group_size >= const_nunits
6729 && group_size % const_nunits == 0)
b17dc4d4
RB
6730 {
6731 nstores = 1;
4d694b27 6732 lnel = const_nunits;
b17dc4d4 6733 ltype = vectype;
04199738 6734 lvectype = vectype;
b17dc4d4 6735 }
cee62fee
MM
6736 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6737 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6738 }
6739
f2e2a985
MM
6740 ivstep = stride_step;
6741 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6742 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6743
6744 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6745
b210f45f
RB
6746 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6747 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6748 create_iv (stride_base, ivstep, NULL,
6749 loop, &incr_gsi, insert_after,
6750 &offvar, NULL);
6751 incr = gsi_stmt (incr_gsi);
4fbeb363 6752 loop_vinfo->add_stmt (incr);
f2e2a985 6753
b210f45f 6754 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6755
6756 prev_stmt_info = NULL;
44fc7854 6757 alias_off = build_int_cst (ref_type, 0);
bffb8014 6758 stmt_vec_info next_stmt_info = first_stmt_info;
f502d50e 6759 for (g = 0; g < group_size; g++)
f2e2a985 6760 {
f502d50e
MM
6761 running_off = offvar;
6762 if (g)
f2e2a985 6763 {
f502d50e
MM
6764 tree size = TYPE_SIZE_UNIT (ltype);
6765 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6766 size);
f502d50e 6767 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6768 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6769 running_off, pos);
f2e2a985 6770 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6771 running_off = newoff;
f502d50e 6772 }
b17dc4d4
RB
6773 unsigned int group_el = 0;
6774 unsigned HOST_WIDE_INT
6775 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6776 for (j = 0; j < ncopies; j++)
6777 {
c3a8f964 6778 /* We've set op and dt above, from vect_get_store_rhs,
bffb8014 6779 and first_stmt_info == stmt_info. */
f502d50e
MM
6780 if (j == 0)
6781 {
6782 if (slp)
6783 {
6784 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6785 slp_node);
f502d50e
MM
6786 vec_oprnd = vec_oprnds[0];
6787 }
6788 else
6789 {
bffb8014
RS
6790 op = vect_get_store_rhs (next_stmt_info);
6791 vec_oprnd = vect_get_vec_def_for_operand
6792 (op, next_stmt_info);
f502d50e
MM
6793 }
6794 }
f2e2a985 6795 else
f502d50e
MM
6796 {
6797 if (slp)
6798 vec_oprnd = vec_oprnds[j];
6799 else
c079cbac 6800 {
894dd753 6801 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411
RS
6802 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6803 vec_oprnd);
c079cbac 6804 }
f502d50e 6805 }
04199738
RB
6806 /* Pun the vector to extract from if necessary. */
6807 if (lvectype != vectype)
6808 {
6809 tree tem = make_ssa_name (lvectype);
6810 gimple *pun
6811 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6812 lvectype, vec_oprnd));
6813 vect_finish_stmt_generation (stmt, pun, gsi);
6814 vec_oprnd = tem;
6815 }
f502d50e
MM
6816 for (i = 0; i < nstores; i++)
6817 {
6818 tree newref, newoff;
355fe088 6819 gimple *incr, *assign;
f502d50e
MM
6820 tree size = TYPE_SIZE (ltype);
6821 /* Extract the i'th component. */
6822 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6823 bitsize_int (i), size);
6824 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6825 size, pos);
6826
6827 elem = force_gimple_operand_gsi (gsi, elem, true,
6828 NULL_TREE, true,
6829 GSI_SAME_STMT);
6830
b17dc4d4
RB
6831 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6832 group_el * elsz);
f502d50e 6833 newref = build2 (MEM_REF, ltype,
b17dc4d4 6834 running_off, this_off);
19986382 6835 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6836
6837 /* And store it to *running_off. */
6838 assign = gimple_build_assign (newref, elem);
e1bd7296
RS
6839 stmt_vec_info assign_info
6840 = vect_finish_stmt_generation (stmt, assign, gsi);
f502d50e 6841
b17dc4d4
RB
6842 group_el += lnel;
6843 if (! slp
6844 || group_el == group_size)
6845 {
6846 newoff = copy_ssa_name (running_off, NULL);
6847 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6848 running_off, stride_step);
6849 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6850
b17dc4d4
RB
6851 running_off = newoff;
6852 group_el = 0;
6853 }
225ce44b
RB
6854 if (g == group_size - 1
6855 && !slp)
f502d50e
MM
6856 {
6857 if (j == 0 && i == 0)
225ce44b 6858 STMT_VINFO_VEC_STMT (stmt_info)
e1bd7296 6859 = *vec_stmt = assign_info;
f502d50e 6860 else
e1bd7296
RS
6861 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6862 prev_stmt_info = assign_info;
f502d50e
MM
6863 }
6864 }
f2e2a985 6865 }
bffb8014 6866 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
b17dc4d4
RB
6867 if (slp)
6868 break;
f2e2a985 6869 }
778dd3b6
RB
6870
6871 vec_oprnds.release ();
f2e2a985
MM
6872 return true;
6873 }
6874
8c681247 6875 auto_vec<tree> dr_chain (group_size);
9771b263 6876 oprnds.create (group_size);
ebfd146a 6877
720f5239 6878 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6879 gcc_assert (alignment_support_scheme);
70088b95
RS
6880 vec_loop_masks *loop_masks
6881 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6882 ? &LOOP_VINFO_MASKS (loop_vinfo)
6883 : NULL);
272c6793 6884 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6885 realignment. vect_supportable_dr_alignment always returns either
6886 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6887 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6888 && !mask
70088b95 6889 && !loop_masks)
272c6793
RS
6890 || alignment_support_scheme == dr_aligned
6891 || alignment_support_scheme == dr_unaligned_supported);
6892
62da9e14
RS
6893 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6894 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6895 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6896
f307441a
RS
6897 tree bump;
6898 tree vec_offset = NULL_TREE;
6899 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6900 {
6901 aggr_type = NULL_TREE;
6902 bump = NULL_TREE;
6903 }
6904 else if (memory_access_type == VMAT_GATHER_SCATTER)
6905 {
6906 aggr_type = elem_type;
6907 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6908 &bump, &vec_offset);
6909 }
272c6793 6910 else
f307441a
RS
6911 {
6912 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6913 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6914 else
6915 aggr_type = vectype;
6916 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6917 }
ebfd146a 6918
c3a8f964
RS
6919 if (mask)
6920 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6921
ebfd146a
IR
6922 /* In case the vectorization factor (VF) is bigger than the number
6923 of elements that we can fit in a vectype (nunits), we have to generate
6924 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6925 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6926 vect_get_vec_def_for_copy_stmt. */
6927
0d0293ac 6928 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6929
6930 S1: &base + 2 = x2
6931 S2: &base = x0
6932 S3: &base + 1 = x1
6933 S4: &base + 3 = x3
6934
6935 We create vectorized stores starting from base address (the access of the
6936 first stmt in the chain (S2 in the above example), when the last store stmt
6937 of the chain (S4) is reached:
6938
6939 VS1: &base = vx2
6940 VS2: &base + vec_size*1 = vx0
6941 VS3: &base + vec_size*2 = vx1
6942 VS4: &base + vec_size*3 = vx3
6943
6944 Then permutation statements are generated:
6945
3fcc1b55
JJ
6946 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6947 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6948 ...
b8698a0f 6949
ebfd146a
IR
6950 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6951 (the order of the data-refs in the output of vect_permute_store_chain
6952 corresponds to the order of scalar stmts in the interleaving chain - see
6953 the documentation of vect_permute_store_chain()).
6954
6955 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6956 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6957 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6958 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6959 */
6960
6961 prev_stmt_info = NULL;
c3a8f964 6962 tree vec_mask = NULL_TREE;
ebfd146a
IR
6963 for (j = 0; j < ncopies; j++)
6964 {
e1bd7296 6965 stmt_vec_info new_stmt_info;
ebfd146a
IR
6966 if (j == 0)
6967 {
6968 if (slp)
6969 {
6970 /* Get vectorized arguments for SLP_NODE. */
d092494c 6971 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6972 NULL, slp_node);
ebfd146a 6973
9771b263 6974 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6975 }
6976 else
6977 {
b8698a0f
L
6978 /* For interleaved stores we collect vectorized defs for all the
6979 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6980 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6981 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6982
2c53b149 6983 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6984 OPRNDS are of size 1. */
bffb8014 6985 stmt_vec_info next_stmt_info = first_stmt_info;
ebfd146a
IR
6986 for (i = 0; i < group_size; i++)
6987 {
b8698a0f 6988 /* Since gaps are not supported for interleaved stores,
2c53b149 6989 DR_GROUP_SIZE is the exact number of stmts in the chain.
bffb8014
RS
6990 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6991 that there is no interleaving, DR_GROUP_SIZE is 1,
6992 and only one iteration of the loop will be executed. */
6993 op = vect_get_store_rhs (next_stmt_info);
6994 vec_oprnd = vect_get_vec_def_for_operand
6995 (op, next_stmt_info);
9771b263
DN
6996 dr_chain.quick_push (vec_oprnd);
6997 oprnds.quick_push (vec_oprnd);
bffb8014 6998 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
ebfd146a 6999 }
c3a8f964
RS
7000 if (mask)
7001 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7002 mask_vectype);
ebfd146a
IR
7003 }
7004
7005 /* We should have catched mismatched types earlier. */
7006 gcc_assert (useless_type_conversion_p (vectype,
7007 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
7008 bool simd_lane_access_p
7009 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7010 if (simd_lane_access_p
7011 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7012 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7013 && integer_zerop (DR_OFFSET (first_dr))
7014 && integer_zerop (DR_INIT (first_dr))
7015 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7016 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
7017 {
7018 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7019 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7020 inv_p = false;
74bf76ed 7021 }
f307441a
RS
7022 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7023 {
7024 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
7025 &dataref_ptr, &vec_offset);
7026 inv_p = false;
7027 }
74bf76ed
JJ
7028 else
7029 dataref_ptr
bffb8014 7030 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
74bf76ed 7031 simd_lane_access_p ? loop : NULL,
09dfa495 7032 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
7033 simd_lane_access_p, &inv_p,
7034 NULL_TREE, bump);
a70d6342 7035 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 7036 }
b8698a0f 7037 else
ebfd146a 7038 {
b8698a0f
L
7039 /* For interleaved stores we created vectorized defs for all the
7040 defs stored in OPRNDS in the previous iteration (previous copy).
7041 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
7042 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7043 next copy.
2c53b149 7044 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
7045 OPRNDS are of size 1. */
7046 for (i = 0; i < group_size; i++)
7047 {
9771b263 7048 op = oprnds[i];
894dd753 7049 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411 7050 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
7051 dr_chain[i] = vec_oprnd;
7052 oprnds[i] = vec_oprnd;
ebfd146a 7053 }
c3a8f964 7054 if (mask)
929b4411 7055 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
7056 if (dataref_offset)
7057 dataref_offset
f307441a
RS
7058 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7059 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
7060 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7061 vec_offset);
74bf76ed
JJ
7062 else
7063 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 7064 bump);
ebfd146a
IR
7065 }
7066
2de001ee 7067 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7068 {
272c6793 7069 tree vec_array;
267d3070 7070
3ba4ff41 7071 /* Get an array into which we can store the individual vectors. */
272c6793 7072 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7073
7074 /* Invalidate the current contents of VEC_ARRAY. This should
7075 become an RTL clobber too, which prevents the vector registers
7076 from being upward-exposed. */
7077 vect_clobber_variable (stmt, gsi, vec_array);
7078
7079 /* Store the individual vectors into the array. */
272c6793 7080 for (i = 0; i < vec_num; i++)
c2d7ab2a 7081 {
9771b263 7082 vec_oprnd = dr_chain[i];
272c6793 7083 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 7084 }
b8698a0f 7085
7cfb4d93 7086 tree final_mask = NULL;
70088b95
RS
7087 if (loop_masks)
7088 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7089 vectype, j);
7cfb4d93
RS
7090 if (vec_mask)
7091 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7092 vec_mask, gsi);
7093
7e11fc7f 7094 gcall *call;
7cfb4d93 7095 if (final_mask)
7e11fc7f
RS
7096 {
7097 /* Emit:
7098 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7099 VEC_ARRAY). */
7100 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7101 tree alias_ptr = build_int_cst (ref_type, align);
7102 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7103 dataref_ptr, alias_ptr,
7cfb4d93 7104 final_mask, vec_array);
7e11fc7f
RS
7105 }
7106 else
7107 {
7108 /* Emit:
7109 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7110 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7111 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7112 vec_array);
7113 gimple_call_set_lhs (call, data_ref);
7114 }
a844293d 7115 gimple_call_set_nothrow (call, true);
e1bd7296 7116 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
3ba4ff41
RS
7117
7118 /* Record that VEC_ARRAY is now dead. */
7119 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
7120 }
7121 else
7122 {
e1bd7296 7123 new_stmt_info = NULL;
0d0293ac 7124 if (grouped_store)
272c6793 7125 {
b6b9227d
JJ
7126 if (j == 0)
7127 result_chain.create (group_size);
272c6793
RS
7128 /* Permute. */
7129 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7130 &result_chain);
7131 }
c2d7ab2a 7132
bffb8014 7133 stmt_vec_info next_stmt_info = first_stmt_info;
272c6793
RS
7134 for (i = 0; i < vec_num; i++)
7135 {
644ffefd 7136 unsigned align, misalign;
272c6793 7137
7cfb4d93 7138 tree final_mask = NULL_TREE;
70088b95
RS
7139 if (loop_masks)
7140 final_mask = vect_get_loop_mask (gsi, loop_masks,
7141 vec_num * ncopies,
7cfb4d93
RS
7142 vectype, vec_num * j + i);
7143 if (vec_mask)
7144 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7145 vec_mask, gsi);
7146
f307441a
RS
7147 if (memory_access_type == VMAT_GATHER_SCATTER)
7148 {
7149 tree scale = size_int (gs_info.scale);
7150 gcall *call;
70088b95 7151 if (loop_masks)
f307441a
RS
7152 call = gimple_build_call_internal
7153 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7154 scale, vec_oprnd, final_mask);
7155 else
7156 call = gimple_build_call_internal
7157 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7158 scale, vec_oprnd);
7159 gimple_call_set_nothrow (call, true);
e1bd7296
RS
7160 new_stmt_info
7161 = vect_finish_stmt_generation (stmt, call, gsi);
f307441a
RS
7162 break;
7163 }
7164
272c6793
RS
7165 if (i > 0)
7166 /* Bump the vector pointer. */
7167 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7168 stmt, bump);
272c6793
RS
7169
7170 if (slp)
9771b263 7171 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7172 else if (grouped_store)
7173 /* For grouped stores vectorized defs are interleaved in
272c6793 7174 vect_permute_store_chain(). */
9771b263 7175 vec_oprnd = result_chain[i];
272c6793 7176
f702e7d4 7177 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7178 if (aligned_access_p (first_dr))
644ffefd 7179 misalign = 0;
272c6793
RS
7180 else if (DR_MISALIGNMENT (first_dr) == -1)
7181 {
25f68d90 7182 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7183 misalign = 0;
272c6793
RS
7184 }
7185 else
c3a8f964 7186 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7187 if (dataref_offset == NULL_TREE
7188 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7189 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7190 misalign);
c2d7ab2a 7191
62da9e14 7192 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7193 {
7194 tree perm_mask = perm_mask_for_reverse (vectype);
7195 tree perm_dest
c3a8f964 7196 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7197 vectype);
b731b390 7198 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7199
7200 /* Generate the permute statement. */
355fe088 7201 gimple *perm_stmt
0d0e4a03
JJ
7202 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7203 vec_oprnd, perm_mask);
09dfa495
BM
7204 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7205
7206 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7207 vec_oprnd = new_temp;
7208 }
7209
272c6793 7210 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7211 if (final_mask)
c3a8f964
RS
7212 {
7213 align = least_bit_hwi (misalign | align);
7214 tree ptr = build_int_cst (ref_type, align);
7215 gcall *call
7216 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7217 dataref_ptr, ptr,
7cfb4d93 7218 final_mask, vec_oprnd);
c3a8f964 7219 gimple_call_set_nothrow (call, true);
e1bd7296
RS
7220 new_stmt_info
7221 = vect_finish_stmt_generation (stmt, call, gsi);
c3a8f964
RS
7222 }
7223 else
7224 {
7225 data_ref = fold_build2 (MEM_REF, vectype,
7226 dataref_ptr,
7227 dataref_offset
7228 ? dataref_offset
7229 : build_int_cst (ref_type, 0));
7230 if (aligned_access_p (first_dr))
7231 ;
7232 else if (DR_MISALIGNMENT (first_dr) == -1)
7233 TREE_TYPE (data_ref)
7234 = build_aligned_type (TREE_TYPE (data_ref),
7235 align * BITS_PER_UNIT);
7236 else
7237 TREE_TYPE (data_ref)
7238 = build_aligned_type (TREE_TYPE (data_ref),
7239 TYPE_ALIGN (elem_type));
19986382 7240 vect_copy_ref_info (data_ref, DR_REF (first_dr));
e1bd7296
RS
7241 gassign *new_stmt
7242 = gimple_build_assign (data_ref, vec_oprnd);
7243 new_stmt_info
7244 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
c3a8f964 7245 }
272c6793
RS
7246
7247 if (slp)
7248 continue;
7249
bffb8014
RS
7250 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7251 if (!next_stmt_info)
272c6793
RS
7252 break;
7253 }
ebfd146a 7254 }
1da0876c
RS
7255 if (!slp)
7256 {
7257 if (j == 0)
e1bd7296 7258 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
1da0876c 7259 else
e1bd7296
RS
7260 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7261 prev_stmt_info = new_stmt_info;
1da0876c 7262 }
ebfd146a
IR
7263 }
7264
9771b263
DN
7265 oprnds.release ();
7266 result_chain.release ();
7267 vec_oprnds.release ();
ebfd146a
IR
7268
7269 return true;
7270}
7271
557be5a8
AL
7272/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7273 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7274 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7275 vect_gen_perm_mask_checked. */
a1e53f3f 7276
3fcc1b55 7277tree
4aae3cb3 7278vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7279{
b00cb3bf 7280 tree mask_type;
a1e53f3f 7281
0ecc2b7d
RS
7282 poly_uint64 nunits = sel.length ();
7283 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7284
7285 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7286 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7287}
7288
7ac7e286 7289/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7290 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7291
7292tree
4aae3cb3 7293vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7294{
7ac7e286 7295 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7296 return vect_gen_perm_mask_any (vectype, sel);
7297}
7298
aec7ae7d
JJ
7299/* Given a vector variable X and Y, that was generated for the scalar
7300 STMT, generate instructions to permute the vector elements of X and Y
7301 using permutation mask MASK_VEC, insert them at *GSI and return the
7302 permuted vector variable. */
a1e53f3f
L
7303
7304static tree
355fe088 7305permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7306 gimple_stmt_iterator *gsi)
a1e53f3f
L
7307{
7308 tree vectype = TREE_TYPE (x);
aec7ae7d 7309 tree perm_dest, data_ref;
355fe088 7310 gimple *perm_stmt;
a1e53f3f 7311
7ad429a4
RS
7312 tree scalar_dest = gimple_get_lhs (stmt);
7313 if (TREE_CODE (scalar_dest) == SSA_NAME)
7314 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7315 else
7316 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7317 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7318
7319 /* Generate the permute statement. */
0d0e4a03 7320 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7321 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7322
7323 return data_ref;
7324}
7325
6b916b36
RB
7326/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7327 inserting them on the loops preheader edge. Returns true if we
7328 were successful in doing so (and thus STMT can be moved then),
7329 otherwise returns false. */
7330
7331static bool
355fe088 7332hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7333{
7334 ssa_op_iter i;
7335 tree op;
7336 bool any = false;
7337
7338 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7339 {
355fe088 7340 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7341 if (!gimple_nop_p (def_stmt)
7342 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7343 {
7344 /* Make sure we don't need to recurse. While we could do
7345 so in simple cases when there are more complex use webs
7346 we don't have an easy way to preserve stmt order to fulfil
7347 dependencies within them. */
7348 tree op2;
7349 ssa_op_iter i2;
d1417442
JJ
7350 if (gimple_code (def_stmt) == GIMPLE_PHI)
7351 return false;
6b916b36
RB
7352 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7353 {
355fe088 7354 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7355 if (!gimple_nop_p (def_stmt2)
7356 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7357 return false;
7358 }
7359 any = true;
7360 }
7361 }
7362
7363 if (!any)
7364 return true;
7365
7366 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7367 {
355fe088 7368 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7369 if (!gimple_nop_p (def_stmt)
7370 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7371 {
7372 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7373 gsi_remove (&gsi, false);
7374 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7375 }
7376 }
7377
7378 return true;
7379}
7380
ebfd146a
IR
7381/* vectorizable_load.
7382
b8698a0f
L
7383 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7384 can be vectorized.
7385 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7386 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7387 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7388
7389static bool
1eede195
RS
7390vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi,
7391 stmt_vec_info *vec_stmt, slp_tree slp_node,
7392 slp_instance slp_node_instance,
68435eb2 7393 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7394{
7395 tree scalar_dest;
7396 tree vec_dest = NULL;
7397 tree data_ref = NULL;
7398 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7399 stmt_vec_info prev_stmt_info;
ebfd146a 7400 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7401 struct loop *loop = NULL;
ebfd146a 7402 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7403 bool nested_in_vect_loop = false;
c716e67f 7404 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7405 tree elem_type;
ebfd146a 7406 tree new_temp;
ef4bddc2 7407 machine_mode mode;
ebfd146a
IR
7408 tree dummy;
7409 enum dr_alignment_support alignment_support_scheme;
7410 tree dataref_ptr = NULL_TREE;
74bf76ed 7411 tree dataref_offset = NULL_TREE;
355fe088 7412 gimple *ptr_incr = NULL;
ebfd146a 7413 int ncopies;
4d694b27
RS
7414 int i, j;
7415 unsigned int group_size;
7416 poly_uint64 group_gap_adj;
ebfd146a
IR
7417 tree msq = NULL_TREE, lsq;
7418 tree offset = NULL_TREE;
356bbc4c 7419 tree byte_offset = NULL_TREE;
ebfd146a 7420 tree realignment_token = NULL_TREE;
538dd0b7 7421 gphi *phi = NULL;
6e1aa848 7422 vec<tree> dr_chain = vNULL;
0d0293ac 7423 bool grouped_load = false;
bffb8014 7424 stmt_vec_info first_stmt_info;
b9787581 7425 stmt_vec_info first_stmt_info_for_drptr = NULL;
ebfd146a
IR
7426 bool inv_p;
7427 bool compute_in_loop = false;
7428 struct loop *at_loop;
7429 int vec_num;
7430 bool slp = (slp_node != NULL);
7431 bool slp_perm = false;
a70d6342 7432 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7433 poly_uint64 vf;
272c6793 7434 tree aggr_type;
134c85ca 7435 gather_scatter_info gs_info;
310213d4 7436 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7437 tree ref_type;
929b4411 7438 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7439
465c8c19
JJ
7440 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7441 return false;
7442
66c16fd9
RB
7443 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7444 && ! vec_stmt)
465c8c19
JJ
7445 return false;
7446
c3a8f964
RS
7447 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7448 if (is_gimple_assign (stmt))
7449 {
7450 scalar_dest = gimple_assign_lhs (stmt);
7451 if (TREE_CODE (scalar_dest) != SSA_NAME)
7452 return false;
465c8c19 7453
c3a8f964
RS
7454 tree_code code = gimple_assign_rhs_code (stmt);
7455 if (code != ARRAY_REF
7456 && code != BIT_FIELD_REF
7457 && code != INDIRECT_REF
7458 && code != COMPONENT_REF
7459 && code != IMAGPART_EXPR
7460 && code != REALPART_EXPR
7461 && code != MEM_REF
7462 && TREE_CODE_CLASS (code) != tcc_declaration)
7463 return false;
7464 }
7465 else
7466 {
7467 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7468 if (!call || !gimple_call_internal_p (call))
7469 return false;
7470
7471 internal_fn ifn = gimple_call_internal_fn (call);
7472 if (!internal_load_fn_p (ifn))
c3a8f964 7473 return false;
465c8c19 7474
c3a8f964
RS
7475 scalar_dest = gimple_call_lhs (call);
7476 if (!scalar_dest)
7477 return false;
7478
7479 if (slp_node != NULL)
7480 {
7481 if (dump_enabled_p ())
7482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7483 "SLP of masked loads not supported.\n");
7484 return false;
7485 }
7486
bfaa08b7
RS
7487 int mask_index = internal_fn_mask_index (ifn);
7488 if (mask_index >= 0)
7489 {
7490 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7491 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7492 &mask_vectype))
bfaa08b7
RS
7493 return false;
7494 }
c3a8f964 7495 }
465c8c19
JJ
7496
7497 if (!STMT_VINFO_DATA_REF (stmt_info))
7498 return false;
7499
7500 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7501 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7502
a70d6342
IR
7503 if (loop_vinfo)
7504 {
7505 loop = LOOP_VINFO_LOOP (loop_vinfo);
7506 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7507 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7508 }
7509 else
3533e503 7510 vf = 1;
ebfd146a
IR
7511
7512 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7514 case of SLP. */
fce57248 7515 if (slp)
ebfd146a
IR
7516 ncopies = 1;
7517 else
e8f142e2 7518 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7519
7520 gcc_assert (ncopies >= 1);
7521
7522 /* FORNOW. This restriction should be relaxed. */
7523 if (nested_in_vect_loop && ncopies > 1)
7524 {
73fbfcad 7525 if (dump_enabled_p ())
78c60e3d 7526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7527 "multiple types in nested loop.\n");
ebfd146a
IR
7528 return false;
7529 }
7530
f2556b68
RB
7531 /* Invalidate assumptions made by dependence analysis when vectorization
7532 on the unrolled body effectively re-orders stmts. */
7533 if (ncopies > 1
7534 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7535 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7536 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7537 {
7538 if (dump_enabled_p ())
7539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7540 "cannot perform implicit CSE when unrolling "
7541 "with negative dependence distance\n");
7542 return false;
7543 }
7544
7b7b1813 7545 elem_type = TREE_TYPE (vectype);
947131ba 7546 mode = TYPE_MODE (vectype);
ebfd146a
IR
7547
7548 /* FORNOW. In some cases can vectorize even if data-type not supported
7549 (e.g. - data copies). */
947131ba 7550 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7551 {
73fbfcad 7552 if (dump_enabled_p ())
78c60e3d 7553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7554 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7555 return false;
7556 }
7557
ebfd146a 7558 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7559 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7560 {
0d0293ac 7561 grouped_load = true;
ebfd146a 7562 /* FORNOW */
2de001ee
RS
7563 gcc_assert (!nested_in_vect_loop);
7564 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7565
bffb8014
RS
7566 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7567 group_size = DR_GROUP_SIZE (first_stmt_info);
d5f035ea 7568
b1af7da6
RB
7569 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7570 slp_perm = true;
7571
f2556b68
RB
7572 /* Invalidate assumptions made by dependence analysis when vectorization
7573 on the unrolled body effectively re-orders stmts. */
7574 if (!PURE_SLP_STMT (stmt_info)
7575 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7576 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7577 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7578 {
7579 if (dump_enabled_p ())
7580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7581 "cannot perform implicit CSE when performing "
7582 "group loads with negative dependence distance\n");
7583 return false;
7584 }
96bb56b2
RB
7585
7586 /* Similarly when the stmt is a load that is both part of a SLP
7587 instance and a loop vectorized stmt via the same-dr mechanism
7588 we have to give up. */
2c53b149 7589 if (DR_GROUP_SAME_DR_STMT (stmt_info)
96bb56b2 7590 && (STMT_SLP_TYPE (stmt_info)
c26228d4 7591 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
96bb56b2
RB
7592 {
7593 if (dump_enabled_p ())
7594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7595 "conflicting SLP types for CSEd load\n");
7596 return false;
7597 }
ebfd146a 7598 }
7cfb4d93
RS
7599 else
7600 group_size = 1;
ebfd146a 7601
2de001ee 7602 vect_memory_access_type memory_access_type;
7e11fc7f 7603 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7604 &memory_access_type, &gs_info))
7605 return false;
a1e53f3f 7606
c3a8f964
RS
7607 if (mask)
7608 {
7609 if (memory_access_type == VMAT_CONTIGUOUS)
7610 {
7e11fc7f
RS
7611 machine_mode vec_mode = TYPE_MODE (vectype);
7612 if (!VECTOR_MODE_P (vec_mode)
7613 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7614 TYPE_MODE (mask_vectype), true))
7615 return false;
7616 }
bfaa08b7 7617 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7618 {
7619 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7620 tree masktype
7621 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7622 if (TREE_CODE (masktype) == INTEGER_TYPE)
7623 {
7624 if (dump_enabled_p ())
7625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7626 "masked gather with integer mask not"
7627 " supported.");
7628 return false;
7629 }
7630 }
bfaa08b7
RS
7631 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7632 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7633 {
7634 if (dump_enabled_p ())
7635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7636 "unsupported access type for masked load.\n");
7637 return false;
7638 }
7639 }
7640
ebfd146a
IR
7641 if (!vec_stmt) /* transformation not required. */
7642 {
2de001ee
RS
7643 if (!slp)
7644 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7645
7646 if (loop_vinfo
7647 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7648 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7649 memory_access_type, &gs_info);
7cfb4d93 7650
ebfd146a 7651 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7652 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7653 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7654 return true;
7655 }
7656
2de001ee
RS
7657 if (!slp)
7658 gcc_assert (memory_access_type
7659 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7660
73fbfcad 7661 if (dump_enabled_p ())
78c60e3d 7662 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7663 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7664
67b8dbac 7665 /* Transform. */
ebfd146a 7666
f702e7d4 7667 ensure_base_align (dr);
c716e67f 7668
bfaa08b7 7669 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7670 {
929b4411
RS
7671 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7672 mask_dt);
aec7ae7d
JJ
7673 return true;
7674 }
2de001ee
RS
7675
7676 if (memory_access_type == VMAT_ELEMENTWISE
7677 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7678 {
7679 gimple_stmt_iterator incr_gsi;
7680 bool insert_after;
355fe088 7681 gimple *incr;
7d75abc8 7682 tree offvar;
7d75abc8
MM
7683 tree ivstep;
7684 tree running_off;
9771b263 7685 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7686 tree stride_base, stride_step, alias_off;
4d694b27
RS
7687 /* Checked by get_load_store_type. */
7688 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7689 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7690
7cfb4d93 7691 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7692 gcc_assert (!nested_in_vect_loop);
7d75abc8 7693
b210f45f 7694 if (grouped_load)
44fc7854 7695 {
bffb8014
RS
7696 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7697 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
44fc7854 7698 }
ab313a8c 7699 else
44fc7854 7700 {
bffb8014 7701 first_stmt_info = stmt_info;
44fc7854 7702 first_dr = dr;
b210f45f
RB
7703 }
7704 if (slp && grouped_load)
7705 {
bffb8014
RS
7706 group_size = DR_GROUP_SIZE (first_stmt_info);
7707 ref_type = get_group_alias_ptr_type (first_stmt_info);
b210f45f
RB
7708 }
7709 else
7710 {
7711 if (grouped_load)
7712 cst_offset
7713 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
bffb8014
RS
7714 * vect_get_place_in_interleaving_chain (stmt,
7715 first_stmt_info));
44fc7854 7716 group_size = 1;
b210f45f 7717 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7718 }
ab313a8c 7719
14ac6aa2
RB
7720 stride_base
7721 = fold_build_pointer_plus
ab313a8c 7722 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7723 size_binop (PLUS_EXPR,
ab313a8c
RB
7724 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7725 convert_to_ptrofftype (DR_INIT (first_dr))));
7726 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7727
7728 /* For a load with loop-invariant (but other than power-of-2)
7729 stride (i.e. not a grouped access) like so:
7730
7731 for (i = 0; i < n; i += stride)
7732 ... = array[i];
7733
7734 we generate a new induction variable and new accesses to
7735 form a new vector (or vectors, depending on ncopies):
7736
7737 for (j = 0; ; j += VF*stride)
7738 tmp1 = array[j];
7739 tmp2 = array[j + stride];
7740 ...
7741 vectemp = {tmp1, tmp2, ...}
7742 */
7743
ab313a8c
RB
7744 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7745 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7746
7747 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7748
b210f45f
RB
7749 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7750 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7751 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7752 loop, &incr_gsi, insert_after,
7753 &offvar, NULL);
7754 incr = gsi_stmt (incr_gsi);
4fbeb363 7755 loop_vinfo->add_stmt (incr);
7d75abc8 7756
b210f45f 7757 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7758
7759 prev_stmt_info = NULL;
7760 running_off = offvar;
44fc7854 7761 alias_off = build_int_cst (ref_type, 0);
4d694b27 7762 int nloads = const_nunits;
e09b4c37 7763 int lnel = 1;
7b5fc413 7764 tree ltype = TREE_TYPE (vectype);
ea60dd34 7765 tree lvectype = vectype;
b266b968 7766 auto_vec<tree> dr_chain;
2de001ee 7767 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7768 {
4d694b27 7769 if (group_size < const_nunits)
e09b4c37 7770 {
ff03930a
JJ
7771 /* First check if vec_init optab supports construction from
7772 vector elts directly. */
b397965c 7773 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7774 machine_mode vmode;
7775 if (mode_for_vector (elmode, group_size).exists (&vmode)
7776 && VECTOR_MODE_P (vmode)
414fef4e 7777 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7778 && (convert_optab_handler (vec_init_optab,
7779 TYPE_MODE (vectype), vmode)
7780 != CODE_FOR_nothing))
ea60dd34 7781 {
4d694b27 7782 nloads = const_nunits / group_size;
ea60dd34 7783 lnel = group_size;
ff03930a
JJ
7784 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7785 }
7786 else
7787 {
7788 /* Otherwise avoid emitting a constructor of vector elements
7789 by performing the loads using an integer type of the same
7790 size, constructing a vector of those and then
7791 re-interpreting it as the original vector type.
7792 This avoids a huge runtime penalty due to the general
7793 inability to perform store forwarding from smaller stores
7794 to a larger load. */
7795 unsigned lsize
7796 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7797 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7798 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7799 /* If we can't construct such a vector fall back to
7800 element loads of the original vector type. */
4d694b27 7801 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7802 && VECTOR_MODE_P (vmode)
414fef4e 7803 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7804 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7805 != CODE_FOR_nothing))
7806 {
4d694b27 7807 nloads = lnunits;
ff03930a
JJ
7808 lnel = group_size;
7809 ltype = build_nonstandard_integer_type (lsize, 1);
7810 lvectype = build_vector_type (ltype, nloads);
7811 }
ea60dd34 7812 }
e09b4c37 7813 }
2de001ee 7814 else
e09b4c37 7815 {
ea60dd34 7816 nloads = 1;
4d694b27 7817 lnel = const_nunits;
e09b4c37 7818 ltype = vectype;
e09b4c37 7819 }
2de001ee
RS
7820 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7821 }
bb4e4747
BC
7822 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7823 else if (nloads == 1)
7824 ltype = vectype;
7825
2de001ee
RS
7826 if (slp)
7827 {
66c16fd9
RB
7828 /* For SLP permutation support we need to load the whole group,
7829 not only the number of vector stmts the permutation result
7830 fits in. */
b266b968 7831 if (slp_perm)
66c16fd9 7832 {
d9f21f6a
RS
7833 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7834 variable VF. */
7835 unsigned int const_vf = vf.to_constant ();
4d694b27 7836 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7837 dr_chain.create (ncopies);
7838 }
7839 else
7840 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7841 }
4d694b27 7842 unsigned int group_el = 0;
e09b4c37
RB
7843 unsigned HOST_WIDE_INT
7844 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7845 for (j = 0; j < ncopies; j++)
7846 {
7b5fc413 7847 if (nloads > 1)
e09b4c37 7848 vec_alloc (v, nloads);
e1bd7296 7849 stmt_vec_info new_stmt_info = NULL;
e09b4c37 7850 for (i = 0; i < nloads; i++)
7b5fc413 7851 {
e09b4c37 7852 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7853 group_el * elsz + cst_offset);
19986382
RB
7854 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7855 vect_copy_ref_info (data_ref, DR_REF (first_dr));
e1bd7296
RS
7856 gassign *new_stmt
7857 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7858 new_stmt_info
7859 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
e09b4c37
RB
7860 if (nloads > 1)
7861 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7862 gimple_assign_lhs (new_stmt));
7863
7864 group_el += lnel;
7865 if (! slp
7866 || group_el == group_size)
7b5fc413 7867 {
e09b4c37
RB
7868 tree newoff = copy_ssa_name (running_off);
7869 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7870 running_off, stride_step);
7b5fc413
RB
7871 vect_finish_stmt_generation (stmt, incr, gsi);
7872
7873 running_off = newoff;
e09b4c37 7874 group_el = 0;
7b5fc413 7875 }
7b5fc413 7876 }
e09b4c37 7877 if (nloads > 1)
7d75abc8 7878 {
ea60dd34
RB
7879 tree vec_inv = build_constructor (lvectype, v);
7880 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e1bd7296 7881 new_stmt_info = vinfo->lookup_def (new_temp);
ea60dd34
RB
7882 if (lvectype != vectype)
7883 {
e1bd7296
RS
7884 gassign *new_stmt
7885 = gimple_build_assign (make_ssa_name (vectype),
7886 VIEW_CONVERT_EXPR,
7887 build1 (VIEW_CONVERT_EXPR,
7888 vectype, new_temp));
7889 new_stmt_info
7890 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
ea60dd34 7891 }
7d75abc8
MM
7892 }
7893
7b5fc413 7894 if (slp)
b266b968 7895 {
b266b968 7896 if (slp_perm)
e1bd7296 7897 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
66c16fd9 7898 else
e1bd7296 7899 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
b266b968 7900 }
7d75abc8 7901 else
225ce44b
RB
7902 {
7903 if (j == 0)
e1bd7296 7904 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
225ce44b 7905 else
e1bd7296
RS
7906 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7907 prev_stmt_info = new_stmt_info;
225ce44b 7908 }
7d75abc8 7909 }
b266b968 7910 if (slp_perm)
29afecdf
RB
7911 {
7912 unsigned n_perms;
7913 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7914 slp_node_instance, false, &n_perms);
7915 }
7d75abc8
MM
7916 return true;
7917 }
aec7ae7d 7918
b5ec4de7
RS
7919 if (memory_access_type == VMAT_GATHER_SCATTER
7920 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7921 grouped_load = false;
7922
0d0293ac 7923 if (grouped_load)
ebfd146a 7924 {
bffb8014
RS
7925 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7926 group_size = DR_GROUP_SIZE (first_stmt_info);
4f0a0218 7927 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7928 without permutation. */
7929 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
bffb8014 7930 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4f0a0218
RB
7931 /* For BB vectorization always use the first stmt to base
7932 the data ref pointer on. */
7933 if (bb_vinfo)
b9787581 7934 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7935
ebfd146a 7936 /* Check if the chain of loads is already vectorized. */
bffb8014 7937 if (STMT_VINFO_VEC_STMT (first_stmt_info)
01d8bf07
RB
7938 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7939 ??? But we can only do so if there is exactly one
7940 as we have no way to get at the rest. Leave the CSE
7941 opportunity alone.
7942 ??? With the group load eventually participating
7943 in multiple different permutations (having multiple
7944 slp nodes which refer to the same group) the CSE
7945 is even wrong code. See PR56270. */
7946 && !slp)
ebfd146a
IR
7947 {
7948 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7949 return true;
7950 }
bffb8014 7951 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
9b999e8c 7952 group_gap_adj = 0;
ebfd146a
IR
7953
7954 /* VEC_NUM is the number of vect stmts to be created for this group. */
7955 if (slp)
7956 {
0d0293ac 7957 grouped_load = false;
91ff1504
RB
7958 /* For SLP permutation support we need to load the whole group,
7959 not only the number of vector stmts the permutation result
7960 fits in. */
7961 if (slp_perm)
b267968e 7962 {
d9f21f6a
RS
7963 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7964 variable VF. */
7965 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7966 unsigned int const_nunits = nunits.to_constant ();
7967 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7968 group_gap_adj = vf * group_size - nunits * vec_num;
7969 }
91ff1504 7970 else
b267968e
RB
7971 {
7972 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7973 group_gap_adj
7974 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7975 }
a70d6342 7976 }
ebfd146a 7977 else
9b999e8c 7978 vec_num = group_size;
44fc7854 7979
bffb8014 7980 ref_type = get_group_alias_ptr_type (first_stmt_info);
ebfd146a
IR
7981 }
7982 else
7983 {
bffb8014 7984 first_stmt_info = stmt_info;
ebfd146a
IR
7985 first_dr = dr;
7986 group_size = vec_num = 1;
9b999e8c 7987 group_gap_adj = 0;
44fc7854 7988 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7989 }
7990
720f5239 7991 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7992 gcc_assert (alignment_support_scheme);
70088b95
RS
7993 vec_loop_masks *loop_masks
7994 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7995 ? &LOOP_VINFO_MASKS (loop_vinfo)
7996 : NULL);
7cfb4d93
RS
7997 /* Targets with store-lane instructions must not require explicit
7998 realignment. vect_supportable_dr_alignment always returns either
7999 dr_aligned or dr_unaligned_supported for masked operations. */
8000 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8001 && !mask
70088b95 8002 && !loop_masks)
272c6793
RS
8003 || alignment_support_scheme == dr_aligned
8004 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
8005
8006 /* In case the vectorization factor (VF) is bigger than the number
8007 of elements that we can fit in a vectype (nunits), we have to generate
8008 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 8009 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 8010 from one copy of the vector stmt to the next, in the field
ff802fa1 8011 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 8012 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
8013 stmts that use the defs of the current stmt. The example below
8014 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8015 need to create 4 vectorized stmts):
ebfd146a
IR
8016
8017 before vectorization:
8018 RELATED_STMT VEC_STMT
8019 S1: x = memref - -
8020 S2: z = x + 1 - -
8021
8022 step 1: vectorize stmt S1:
8023 We first create the vector stmt VS1_0, and, as usual, record a
8024 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8025 Next, we create the vector stmt VS1_1, and record a pointer to
8026 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 8027 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
8028 stmts and pointers:
8029 RELATED_STMT VEC_STMT
8030 VS1_0: vx0 = memref0 VS1_1 -
8031 VS1_1: vx1 = memref1 VS1_2 -
8032 VS1_2: vx2 = memref2 VS1_3 -
8033 VS1_3: vx3 = memref3 - -
8034 S1: x = load - VS1_0
8035 S2: z = x + 1 - -
8036
b8698a0f
L
8037 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8038 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
8039 stmt S2. */
8040
0d0293ac 8041 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
8042
8043 S1: x2 = &base + 2
8044 S2: x0 = &base
8045 S3: x1 = &base + 1
8046 S4: x3 = &base + 3
8047
b8698a0f 8048 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
8049 starting from the access of the first stmt of the chain:
8050
8051 VS1: vx0 = &base
8052 VS2: vx1 = &base + vec_size*1
8053 VS3: vx3 = &base + vec_size*2
8054 VS4: vx4 = &base + vec_size*3
8055
8056 Then permutation statements are generated:
8057
e2c83630
RH
8058 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8059 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
8060 ...
8061
8062 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8063 (the order of the data-refs in the output of vect_permute_load_chain
8064 corresponds to the order of scalar stmts in the interleaving chain - see
8065 the documentation of vect_permute_load_chain()).
8066 The generation of permutation stmts and recording them in
0d0293ac 8067 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8068
b8698a0f 8069 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8070 permutation stmts above are created for every copy. The result vector
8071 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8072 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8073
8074 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8075 on a target that supports unaligned accesses (dr_unaligned_supported)
8076 we generate the following code:
8077 p = initial_addr;
8078 indx = 0;
8079 loop {
8080 p = p + indx * vectype_size;
8081 vec_dest = *(p);
8082 indx = indx + 1;
8083 }
8084
8085 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8086 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8087 then generate the following code, in which the data in each iteration is
8088 obtained by two vector loads, one from the previous iteration, and one
8089 from the current iteration:
8090 p1 = initial_addr;
8091 msq_init = *(floor(p1))
8092 p2 = initial_addr + VS - 1;
8093 realignment_token = call target_builtin;
8094 indx = 0;
8095 loop {
8096 p2 = p2 + indx * vectype_size
8097 lsq = *(floor(p2))
8098 vec_dest = realign_load (msq, lsq, realignment_token)
8099 indx = indx + 1;
8100 msq = lsq;
8101 } */
8102
8103 /* If the misalignment remains the same throughout the execution of the
8104 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8105 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8106 This can only occur when vectorizing memory accesses in the inner-loop
8107 nested within an outer-loop that is being vectorized. */
8108
d1e4b493 8109 if (nested_in_vect_loop
cf098191
RS
8110 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8111 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8112 {
8113 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8114 compute_in_loop = true;
8115 }
8116
8117 if ((alignment_support_scheme == dr_explicit_realign_optimized
8118 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8119 && !compute_in_loop)
ebfd146a 8120 {
bffb8014 8121 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
ebfd146a
IR
8122 alignment_support_scheme, NULL_TREE,
8123 &at_loop);
8124 if (alignment_support_scheme == dr_explicit_realign_optimized)
8125 {
538dd0b7 8126 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8127 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8128 size_one_node);
ebfd146a
IR
8129 }
8130 }
8131 else
8132 at_loop = loop;
8133
62da9e14 8134 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8135 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8136
ab2fc782
RS
8137 tree bump;
8138 tree vec_offset = NULL_TREE;
8139 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8140 {
8141 aggr_type = NULL_TREE;
8142 bump = NULL_TREE;
8143 }
8144 else if (memory_access_type == VMAT_GATHER_SCATTER)
8145 {
8146 aggr_type = elem_type;
8147 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8148 &bump, &vec_offset);
8149 }
272c6793 8150 else
ab2fc782
RS
8151 {
8152 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8153 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8154 else
8155 aggr_type = vectype;
8156 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8157 }
272c6793 8158
c3a8f964 8159 tree vec_mask = NULL_TREE;
ebfd146a 8160 prev_stmt_info = NULL;
4d694b27 8161 poly_uint64 group_elt = 0;
ebfd146a 8162 for (j = 0; j < ncopies; j++)
b8698a0f 8163 {
e1bd7296 8164 stmt_vec_info new_stmt_info = NULL;
272c6793 8165 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8166 if (j == 0)
74bf76ed
JJ
8167 {
8168 bool simd_lane_access_p
8169 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8170 if (simd_lane_access_p
8171 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8172 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8173 && integer_zerop (DR_OFFSET (first_dr))
8174 && integer_zerop (DR_INIT (first_dr))
8175 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8176 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8177 && (alignment_support_scheme == dr_aligned
8178 || alignment_support_scheme == dr_unaligned_supported))
8179 {
8180 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8181 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8182 inv_p = false;
74bf76ed 8183 }
b9787581 8184 else if (first_stmt_info_for_drptr
bffb8014 8185 && first_stmt_info != first_stmt_info_for_drptr)
4f0a0218
RB
8186 {
8187 dataref_ptr
b9787581
RS
8188 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8189 aggr_type, at_loop, offset, &dummy,
8190 gsi, &ptr_incr, simd_lane_access_p,
ab2fc782 8191 &inv_p, byte_offset, bump);
4f0a0218
RB
8192 /* Adjust the pointer by the difference to first_stmt. */
8193 data_reference_p ptrdr
b9787581 8194 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
4f0a0218
RB
8195 tree diff = fold_convert (sizetype,
8196 size_binop (MINUS_EXPR,
8197 DR_INIT (first_dr),
8198 DR_INIT (ptrdr)));
8199 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8200 stmt, diff);
8201 }
bfaa08b7
RS
8202 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8203 {
8204 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8205 &dataref_ptr, &vec_offset);
8206 inv_p = false;
8207 }
74bf76ed
JJ
8208 else
8209 dataref_ptr
bffb8014 8210 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
74bf76ed 8211 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8212 simd_lane_access_p, &inv_p,
ab2fc782 8213 byte_offset, bump);
c3a8f964
RS
8214 if (mask)
8215 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8216 mask_vectype);
74bf76ed 8217 }
ebfd146a 8218 else
c3a8f964
RS
8219 {
8220 if (dataref_offset)
8221 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8222 bump);
bfaa08b7 8223 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8224 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8225 vec_offset);
c3a8f964 8226 else
ab2fc782
RS
8227 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8228 stmt, bump);
c3a8f964 8229 if (mask)
929b4411 8230 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8231 }
ebfd146a 8232
0d0293ac 8233 if (grouped_load || slp_perm)
9771b263 8234 dr_chain.create (vec_num);
5ce1ee7f 8235
2de001ee 8236 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8237 {
272c6793
RS
8238 tree vec_array;
8239
8240 vec_array = create_vector_array (vectype, vec_num);
8241
7cfb4d93 8242 tree final_mask = NULL_TREE;
70088b95
RS
8243 if (loop_masks)
8244 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8245 vectype, j);
7cfb4d93
RS
8246 if (vec_mask)
8247 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8248 vec_mask, gsi);
8249
7e11fc7f 8250 gcall *call;
7cfb4d93 8251 if (final_mask)
7e11fc7f
RS
8252 {
8253 /* Emit:
8254 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8255 VEC_MASK). */
8256 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8257 tree alias_ptr = build_int_cst (ref_type, align);
8258 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8259 dataref_ptr, alias_ptr,
7cfb4d93 8260 final_mask);
7e11fc7f
RS
8261 }
8262 else
8263 {
8264 /* Emit:
8265 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8266 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8267 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8268 }
a844293d
RS
8269 gimple_call_set_lhs (call, vec_array);
8270 gimple_call_set_nothrow (call, true);
e1bd7296 8271 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
ebfd146a 8272
272c6793
RS
8273 /* Extract each vector into an SSA_NAME. */
8274 for (i = 0; i < vec_num; i++)
ebfd146a 8275 {
272c6793
RS
8276 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8277 vec_array, i);
9771b263 8278 dr_chain.quick_push (new_temp);
272c6793
RS
8279 }
8280
8281 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8282 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8283
8284 /* Record that VEC_ARRAY is now dead. */
8285 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8286 }
8287 else
8288 {
8289 for (i = 0; i < vec_num; i++)
8290 {
7cfb4d93 8291 tree final_mask = NULL_TREE;
70088b95 8292 if (loop_masks
7cfb4d93 8293 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8294 final_mask = vect_get_loop_mask (gsi, loop_masks,
8295 vec_num * ncopies,
7cfb4d93
RS
8296 vectype, vec_num * j + i);
8297 if (vec_mask)
8298 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8299 vec_mask, gsi);
8300
272c6793
RS
8301 if (i > 0)
8302 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8303 stmt, bump);
272c6793
RS
8304
8305 /* 2. Create the vector-load in the loop. */
e1bd7296 8306 gimple *new_stmt = NULL;
272c6793
RS
8307 switch (alignment_support_scheme)
8308 {
8309 case dr_aligned:
8310 case dr_unaligned_supported:
be1ac4ec 8311 {
644ffefd
MJ
8312 unsigned int align, misalign;
8313
bfaa08b7
RS
8314 if (memory_access_type == VMAT_GATHER_SCATTER)
8315 {
8316 tree scale = size_int (gs_info.scale);
8317 gcall *call;
70088b95 8318 if (loop_masks)
bfaa08b7
RS
8319 call = gimple_build_call_internal
8320 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8321 vec_offset, scale, final_mask);
8322 else
8323 call = gimple_build_call_internal
8324 (IFN_GATHER_LOAD, 3, dataref_ptr,
8325 vec_offset, scale);
8326 gimple_call_set_nothrow (call, true);
8327 new_stmt = call;
8328 data_ref = NULL_TREE;
8329 break;
8330 }
8331
f702e7d4 8332 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8333 if (alignment_support_scheme == dr_aligned)
8334 {
8335 gcc_assert (aligned_access_p (first_dr));
644ffefd 8336 misalign = 0;
272c6793
RS
8337 }
8338 else if (DR_MISALIGNMENT (first_dr) == -1)
8339 {
25f68d90 8340 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8341 misalign = 0;
272c6793
RS
8342 }
8343 else
c3a8f964 8344 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8345 if (dataref_offset == NULL_TREE
8346 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8347 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8348 align, misalign);
c3a8f964 8349
7cfb4d93 8350 if (final_mask)
c3a8f964
RS
8351 {
8352 align = least_bit_hwi (misalign | align);
8353 tree ptr = build_int_cst (ref_type, align);
8354 gcall *call
8355 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8356 dataref_ptr, ptr,
7cfb4d93 8357 final_mask);
c3a8f964
RS
8358 gimple_call_set_nothrow (call, true);
8359 new_stmt = call;
8360 data_ref = NULL_TREE;
8361 }
8362 else
8363 {
8364 data_ref
8365 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8366 dataref_offset
8367 ? dataref_offset
8368 : build_int_cst (ref_type, 0));
8369 if (alignment_support_scheme == dr_aligned)
8370 ;
8371 else if (DR_MISALIGNMENT (first_dr) == -1)
8372 TREE_TYPE (data_ref)
8373 = build_aligned_type (TREE_TYPE (data_ref),
8374 align * BITS_PER_UNIT);
8375 else
8376 TREE_TYPE (data_ref)
8377 = build_aligned_type (TREE_TYPE (data_ref),
8378 TYPE_ALIGN (elem_type));
8379 }
272c6793 8380 break;
be1ac4ec 8381 }
272c6793 8382 case dr_explicit_realign:
267d3070 8383 {
272c6793 8384 tree ptr, bump;
272c6793 8385
d88981fc 8386 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8387
8388 if (compute_in_loop)
bffb8014 8389 msq = vect_setup_realignment (first_stmt_info, gsi,
272c6793
RS
8390 &realignment_token,
8391 dr_explicit_realign,
8392 dataref_ptr, NULL);
8393
aed93b23
RB
8394 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8395 ptr = copy_ssa_name (dataref_ptr);
8396 else
8397 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8398 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8399 new_stmt = gimple_build_assign
8400 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8401 build_int_cst
8402 (TREE_TYPE (dataref_ptr),
f702e7d4 8403 -(HOST_WIDE_INT) align));
272c6793
RS
8404 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8405 data_ref
8406 = build2 (MEM_REF, vectype, ptr,
44fc7854 8407 build_int_cst (ref_type, 0));
19986382 8408 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8409 vec_dest = vect_create_destination_var (scalar_dest,
8410 vectype);
8411 new_stmt = gimple_build_assign (vec_dest, data_ref);
8412 new_temp = make_ssa_name (vec_dest, new_stmt);
8413 gimple_assign_set_lhs (new_stmt, new_temp);
8414 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8415 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8416 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8417 msq = new_temp;
8418
d88981fc 8419 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8420 TYPE_SIZE_UNIT (elem_type));
d88981fc 8421 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8422 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8423 new_stmt = gimple_build_assign
8424 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8425 build_int_cst
f702e7d4 8426 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8427 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8428 gimple_assign_set_lhs (new_stmt, ptr);
8429 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8430 data_ref
8431 = build2 (MEM_REF, vectype, ptr,
44fc7854 8432 build_int_cst (ref_type, 0));
272c6793 8433 break;
267d3070 8434 }
272c6793 8435 case dr_explicit_realign_optimized:
f702e7d4
RS
8436 {
8437 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8438 new_temp = copy_ssa_name (dataref_ptr);
8439 else
8440 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8441 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8442 new_stmt = gimple_build_assign
8443 (new_temp, BIT_AND_EXPR, dataref_ptr,
8444 build_int_cst (TREE_TYPE (dataref_ptr),
8445 -(HOST_WIDE_INT) align));
8446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8447 data_ref
8448 = build2 (MEM_REF, vectype, new_temp,
8449 build_int_cst (ref_type, 0));
8450 break;
8451 }
272c6793
RS
8452 default:
8453 gcc_unreachable ();
8454 }
ebfd146a 8455 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8456 /* DATA_REF is null if we've already built the statement. */
8457 if (data_ref)
19986382
RB
8458 {
8459 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8460 new_stmt = gimple_build_assign (vec_dest, data_ref);
8461 }
ebfd146a 8462 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8463 gimple_set_lhs (new_stmt, new_temp);
e1bd7296
RS
8464 new_stmt_info
8465 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8466
272c6793
RS
8467 /* 3. Handle explicit realignment if necessary/supported.
8468 Create in loop:
8469 vec_dest = realign_load (msq, lsq, realignment_token) */
8470 if (alignment_support_scheme == dr_explicit_realign_optimized
8471 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8472 {
272c6793
RS
8473 lsq = gimple_assign_lhs (new_stmt);
8474 if (!realignment_token)
8475 realignment_token = dataref_ptr;
8476 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8477 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8478 msq, lsq, realignment_token);
272c6793
RS
8479 new_temp = make_ssa_name (vec_dest, new_stmt);
8480 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296
RS
8481 new_stmt_info
8482 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
8483
8484 if (alignment_support_scheme == dr_explicit_realign_optimized)
8485 {
8486 gcc_assert (phi);
8487 if (i == vec_num - 1 && j == ncopies - 1)
8488 add_phi_arg (phi, lsq,
8489 loop_latch_edge (containing_loop),
9e227d60 8490 UNKNOWN_LOCATION);
272c6793
RS
8491 msq = lsq;
8492 }
ebfd146a 8493 }
ebfd146a 8494
59fd17e3
RB
8495 /* 4. Handle invariant-load. */
8496 if (inv_p && !bb_vinfo)
8497 {
59fd17e3 8498 gcc_assert (!grouped_load);
d1417442
JJ
8499 /* If we have versioned for aliasing or the loop doesn't
8500 have any data dependencies that would preclude this,
8501 then we are sure this is a loop invariant load and
8502 thus we can insert it on the preheader edge. */
8503 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8504 && !nested_in_vect_loop
6b916b36 8505 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8506 {
8507 if (dump_enabled_p ())
8508 {
8509 dump_printf_loc (MSG_NOTE, vect_location,
8510 "hoisting out of the vectorized "
8511 "loop: ");
8512 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8513 }
b731b390 8514 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8515 gsi_insert_on_edge_immediate
8516 (loop_preheader_edge (loop),
8517 gimple_build_assign (tem,
8518 unshare_expr
8519 (gimple_assign_rhs1 (stmt))));
8520 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5 8521 new_stmt = SSA_NAME_DEF_STMT (new_temp);
e1bd7296 8522 new_stmt_info = vinfo->add_stmt (new_stmt);
a0e35eb0
RB
8523 }
8524 else
8525 {
8526 gimple_stmt_iterator gsi2 = *gsi;
8527 gsi_next (&gsi2);
8528 new_temp = vect_init_vector (stmt, scalar_dest,
8529 vectype, &gsi2);
e1bd7296 8530 new_stmt_info = vinfo->lookup_def (new_temp);
a0e35eb0 8531 }
59fd17e3
RB
8532 }
8533
62da9e14 8534 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8535 {
aec7ae7d
JJ
8536 tree perm_mask = perm_mask_for_reverse (vectype);
8537 new_temp = permute_vec_elements (new_temp, new_temp,
8538 perm_mask, stmt, gsi);
e1bd7296 8539 new_stmt_info = vinfo->lookup_def (new_temp);
ebfd146a 8540 }
267d3070 8541
272c6793 8542 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8543 vect_transform_grouped_load (). */
8544 if (grouped_load || slp_perm)
9771b263 8545 dr_chain.quick_push (new_temp);
267d3070 8546
272c6793
RS
8547 /* Store vector loads in the corresponding SLP_NODE. */
8548 if (slp && !slp_perm)
e1bd7296 8549 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
b267968e
RB
8550
8551 /* With SLP permutation we load the gaps as well, without
8552 we need to skip the gaps after we manage to fully load
2c53b149 8553 all elements. group_gap_adj is DR_GROUP_SIZE here. */
b267968e 8554 group_elt += nunits;
d9f21f6a
RS
8555 if (maybe_ne (group_gap_adj, 0U)
8556 && !slp_perm
8557 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8558 {
d9f21f6a
RS
8559 poly_wide_int bump_val
8560 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8561 * group_gap_adj);
8e6cdc90 8562 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8563 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8564 stmt, bump);
8565 group_elt = 0;
8566 }
272c6793 8567 }
9b999e8c
RB
8568 /* Bump the vector pointer to account for a gap or for excess
8569 elements loaded for a permuted SLP load. */
d9f21f6a 8570 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8571 {
d9f21f6a
RS
8572 poly_wide_int bump_val
8573 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8574 * group_gap_adj);
8e6cdc90 8575 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8576 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8577 stmt, bump);
8578 }
ebfd146a
IR
8579 }
8580
8581 if (slp && !slp_perm)
8582 continue;
8583
8584 if (slp_perm)
8585 {
29afecdf 8586 unsigned n_perms;
01d8bf07 8587 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8588 slp_node_instance, false,
8589 &n_perms))
ebfd146a 8590 {
9771b263 8591 dr_chain.release ();
ebfd146a
IR
8592 return false;
8593 }
8594 }
8595 else
8596 {
0d0293ac 8597 if (grouped_load)
ebfd146a 8598 {
2de001ee 8599 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8600 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8601 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8602 }
8603 else
8604 {
8605 if (j == 0)
e1bd7296 8606 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
ebfd146a 8607 else
e1bd7296
RS
8608 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8609 prev_stmt_info = new_stmt_info;
ebfd146a
IR
8610 }
8611 }
9771b263 8612 dr_chain.release ();
ebfd146a
IR
8613 }
8614
ebfd146a
IR
8615 return true;
8616}
8617
8618/* Function vect_is_simple_cond.
b8698a0f 8619
ebfd146a
IR
8620 Input:
8621 LOOP - the loop that is being vectorized.
8622 COND - Condition that is checked for simple use.
8623
e9e1d143
RG
8624 Output:
8625 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8626 *DTS - The def types for the arguments of the comparison
e9e1d143 8627
ebfd146a
IR
8628 Returns whether a COND can be vectorized. Checks whether
8629 condition operands are supportable using vec_is_simple_use. */
8630
87aab9b2 8631static bool
4fc5ebf1 8632vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8633 tree *comp_vectype, enum vect_def_type *dts,
8634 tree vectype)
ebfd146a
IR
8635{
8636 tree lhs, rhs;
e9e1d143 8637 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8638
a414c77f
IE
8639 /* Mask case. */
8640 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8641 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f 8642 {
894dd753 8643 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
a414c77f
IE
8644 || !*comp_vectype
8645 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8646 return false;
8647 return true;
8648 }
8649
ebfd146a
IR
8650 if (!COMPARISON_CLASS_P (cond))
8651 return false;
8652
8653 lhs = TREE_OPERAND (cond, 0);
8654 rhs = TREE_OPERAND (cond, 1);
8655
8656 if (TREE_CODE (lhs) == SSA_NAME)
8657 {
894dd753 8658 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
ebfd146a
IR
8659 return false;
8660 }
4fc5ebf1
JG
8661 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8662 || TREE_CODE (lhs) == FIXED_CST)
8663 dts[0] = vect_constant_def;
8664 else
ebfd146a
IR
8665 return false;
8666
8667 if (TREE_CODE (rhs) == SSA_NAME)
8668 {
894dd753 8669 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
ebfd146a
IR
8670 return false;
8671 }
4fc5ebf1
JG
8672 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8673 || TREE_CODE (rhs) == FIXED_CST)
8674 dts[1] = vect_constant_def;
8675 else
ebfd146a
IR
8676 return false;
8677
28b33016 8678 if (vectype1 && vectype2
928686b1
RS
8679 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8680 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8681 return false;
8682
e9e1d143 8683 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8684 /* Invariant comparison. */
4515e413 8685 if (! *comp_vectype && vectype)
8da4c8d8
RB
8686 {
8687 tree scalar_type = TREE_TYPE (lhs);
8688 /* If we can widen the comparison to match vectype do so. */
8689 if (INTEGRAL_TYPE_P (scalar_type)
8690 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8691 TYPE_SIZE (TREE_TYPE (vectype))))
8692 scalar_type = build_nonstandard_integer_type
8693 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8694 TYPE_UNSIGNED (scalar_type));
8695 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8696 }
8697
ebfd146a
IR
8698 return true;
8699}
8700
8701/* vectorizable_condition.
8702
b8698a0f
L
8703 Check if STMT is conditional modify expression that can be vectorized.
8704 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8705 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8706 at GSI.
8707
8708 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8709 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8710 else clause if it is 2).
ebfd146a
IR
8711
8712 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8713
4bbe8262 8714bool
355fe088 8715vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195
RS
8716 stmt_vec_info *vec_stmt, tree reduc_def,
8717 int reduc_index, slp_tree slp_node,
8718 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
8719{
8720 tree scalar_dest = NULL_TREE;
8721 tree vec_dest = NULL_TREE;
01216d27
JJ
8722 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8723 tree then_clause, else_clause;
ebfd146a 8724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8725 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8726 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8727 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8728 tree vec_compare;
ebfd146a
IR
8729 tree new_temp;
8730 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8731 enum vect_def_type dts[4]
8732 = {vect_unknown_def_type, vect_unknown_def_type,
8733 vect_unknown_def_type, vect_unknown_def_type};
8734 int ndts = 4;
f7e531cf 8735 int ncopies;
01216d27 8736 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8737 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8738 int i, j;
8739 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8740 vec<tree> vec_oprnds0 = vNULL;
8741 vec<tree> vec_oprnds1 = vNULL;
8742 vec<tree> vec_oprnds2 = vNULL;
8743 vec<tree> vec_oprnds3 = vNULL;
74946978 8744 tree vec_cmp_type;
a414c77f 8745 bool masked = false;
b8698a0f 8746
f7e531cf
IR
8747 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8748 return false;
8749
bb6c2b68
RS
8750 vect_reduction_type reduction_type
8751 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8752 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8753 {
8754 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8755 return false;
ebfd146a 8756
af29617a
AH
8757 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8758 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8759 && reduc_def))
8760 return false;
ebfd146a 8761
af29617a
AH
8762 /* FORNOW: not yet supported. */
8763 if (STMT_VINFO_LIVE_P (stmt_info))
8764 {
8765 if (dump_enabled_p ())
8766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8767 "value used after loop.\n");
8768 return false;
8769 }
ebfd146a
IR
8770 }
8771
8772 /* Is vectorizable conditional operation? */
8773 if (!is_gimple_assign (stmt))
8774 return false;
8775
8776 code = gimple_assign_rhs_code (stmt);
8777
8778 if (code != COND_EXPR)
8779 return false;
8780
465c8c19 8781 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8782 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8783
fce57248 8784 if (slp_node)
465c8c19
JJ
8785 ncopies = 1;
8786 else
e8f142e2 8787 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8788
8789 gcc_assert (ncopies >= 1);
8790 if (reduc_index && ncopies > 1)
8791 return false; /* FORNOW */
8792
4e71066d
RG
8793 cond_expr = gimple_assign_rhs1 (stmt);
8794 then_clause = gimple_assign_rhs2 (stmt);
8795 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8796
4fc5ebf1 8797 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8798 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8799 || !comp_vectype)
ebfd146a
IR
8800 return false;
8801
894dd753 8802 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
2947d3b2 8803 return false;
894dd753 8804 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
ebfd146a 8805 return false;
2947d3b2
IE
8806
8807 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8808 return false;
8809
8810 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8811 return false;
8812
28b33016
IE
8813 masked = !COMPARISON_CLASS_P (cond_expr);
8814 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8815
74946978
MP
8816 if (vec_cmp_type == NULL_TREE)
8817 return false;
784fb9b3 8818
01216d27
JJ
8819 cond_code = TREE_CODE (cond_expr);
8820 if (!masked)
8821 {
8822 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8823 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8824 }
8825
8826 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8827 {
8828 /* Boolean values may have another representation in vectors
8829 and therefore we prefer bit operations over comparison for
8830 them (which also works for scalar masks). We store opcodes
8831 to use in bitop1 and bitop2. Statement is vectorized as
8832 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8833 depending on bitop1 and bitop2 arity. */
8834 switch (cond_code)
8835 {
8836 case GT_EXPR:
8837 bitop1 = BIT_NOT_EXPR;
8838 bitop2 = BIT_AND_EXPR;
8839 break;
8840 case GE_EXPR:
8841 bitop1 = BIT_NOT_EXPR;
8842 bitop2 = BIT_IOR_EXPR;
8843 break;
8844 case LT_EXPR:
8845 bitop1 = BIT_NOT_EXPR;
8846 bitop2 = BIT_AND_EXPR;
8847 std::swap (cond_expr0, cond_expr1);
8848 break;
8849 case LE_EXPR:
8850 bitop1 = BIT_NOT_EXPR;
8851 bitop2 = BIT_IOR_EXPR;
8852 std::swap (cond_expr0, cond_expr1);
8853 break;
8854 case NE_EXPR:
8855 bitop1 = BIT_XOR_EXPR;
8856 break;
8857 case EQ_EXPR:
8858 bitop1 = BIT_XOR_EXPR;
8859 bitop2 = BIT_NOT_EXPR;
8860 break;
8861 default:
8862 return false;
8863 }
8864 cond_code = SSA_NAME;
8865 }
8866
b8698a0f 8867 if (!vec_stmt)
ebfd146a 8868 {
01216d27
JJ
8869 if (bitop1 != NOP_EXPR)
8870 {
8871 machine_mode mode = TYPE_MODE (comp_vectype);
8872 optab optab;
8873
8874 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8875 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8876 return false;
8877
8878 if (bitop2 != NOP_EXPR)
8879 {
8880 optab = optab_for_tree_code (bitop2, comp_vectype,
8881 optab_default);
8882 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8883 return false;
8884 }
8885 }
4fc5ebf1
JG
8886 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8887 cond_code))
8888 {
68435eb2
RB
8889 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8890 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8891 cost_vec);
4fc5ebf1
JG
8892 return true;
8893 }
8894 return false;
ebfd146a
IR
8895 }
8896
f7e531cf
IR
8897 /* Transform. */
8898
8899 if (!slp_node)
8900 {
9771b263
DN
8901 vec_oprnds0.create (1);
8902 vec_oprnds1.create (1);
8903 vec_oprnds2.create (1);
8904 vec_oprnds3.create (1);
f7e531cf 8905 }
ebfd146a
IR
8906
8907 /* Handle def. */
8908 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8909 if (reduction_type != EXTRACT_LAST_REDUCTION)
8910 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8911
8912 /* Handle cond expr. */
a855b1b1
MM
8913 for (j = 0; j < ncopies; j++)
8914 {
e1bd7296 8915 stmt_vec_info new_stmt_info = NULL;
a855b1b1
MM
8916 if (j == 0)
8917 {
f7e531cf
IR
8918 if (slp_node)
8919 {
00f96dc9
TS
8920 auto_vec<tree, 4> ops;
8921 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8922
a414c77f 8923 if (masked)
01216d27 8924 ops.safe_push (cond_expr);
a414c77f
IE
8925 else
8926 {
01216d27
JJ
8927 ops.safe_push (cond_expr0);
8928 ops.safe_push (cond_expr1);
a414c77f 8929 }
9771b263
DN
8930 ops.safe_push (then_clause);
8931 ops.safe_push (else_clause);
306b0c92 8932 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8933 vec_oprnds3 = vec_defs.pop ();
8934 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8935 if (!masked)
8936 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8937 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8938 }
8939 else
8940 {
a414c77f
IE
8941 if (masked)
8942 {
8943 vec_cond_lhs
8944 = vect_get_vec_def_for_operand (cond_expr, stmt,
8945 comp_vectype);
894dd753 8946 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
a414c77f
IE
8947 }
8948 else
8949 {
01216d27
JJ
8950 vec_cond_lhs
8951 = vect_get_vec_def_for_operand (cond_expr0,
8952 stmt, comp_vectype);
894dd753 8953 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
01216d27
JJ
8954
8955 vec_cond_rhs
8956 = vect_get_vec_def_for_operand (cond_expr1,
8957 stmt, comp_vectype);
894dd753 8958 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
a414c77f 8959 }
f7e531cf
IR
8960 if (reduc_index == 1)
8961 vec_then_clause = reduc_def;
8962 else
8963 {
8964 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241 8965 stmt);
894dd753 8966 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
f7e531cf
IR
8967 }
8968 if (reduc_index == 2)
8969 vec_else_clause = reduc_def;
8970 else
8971 {
8972 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241 8973 stmt);
894dd753 8974 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
f7e531cf 8975 }
a855b1b1
MM
8976 }
8977 }
8978 else
8979 {
a414c77f
IE
8980 vec_cond_lhs
8981 = vect_get_vec_def_for_stmt_copy (dts[0],
8982 vec_oprnds0.pop ());
8983 if (!masked)
8984 vec_cond_rhs
8985 = vect_get_vec_def_for_stmt_copy (dts[1],
8986 vec_oprnds1.pop ());
8987
a855b1b1 8988 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8989 vec_oprnds2.pop ());
a855b1b1 8990 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8991 vec_oprnds3.pop ());
f7e531cf
IR
8992 }
8993
8994 if (!slp_node)
8995 {
9771b263 8996 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8997 if (!masked)
8998 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8999 vec_oprnds2.quick_push (vec_then_clause);
9000 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
9001 }
9002
9dc3f7de 9003 /* Arguments are ready. Create the new vector stmt. */
9771b263 9004 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 9005 {
9771b263
DN
9006 vec_then_clause = vec_oprnds2[i];
9007 vec_else_clause = vec_oprnds3[i];
a855b1b1 9008
a414c77f
IE
9009 if (masked)
9010 vec_compare = vec_cond_lhs;
9011 else
9012 {
9013 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
9014 if (bitop1 == NOP_EXPR)
9015 vec_compare = build2 (cond_code, vec_cmp_type,
9016 vec_cond_lhs, vec_cond_rhs);
9017 else
9018 {
9019 new_temp = make_ssa_name (vec_cmp_type);
e1bd7296 9020 gassign *new_stmt;
01216d27
JJ
9021 if (bitop1 == BIT_NOT_EXPR)
9022 new_stmt = gimple_build_assign (new_temp, bitop1,
9023 vec_cond_rhs);
9024 else
9025 new_stmt
9026 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9027 vec_cond_rhs);
9028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9029 if (bitop2 == NOP_EXPR)
9030 vec_compare = new_temp;
9031 else if (bitop2 == BIT_NOT_EXPR)
9032 {
9033 /* Instead of doing ~x ? y : z do x ? z : y. */
9034 vec_compare = new_temp;
9035 std::swap (vec_then_clause, vec_else_clause);
9036 }
9037 else
9038 {
9039 vec_compare = make_ssa_name (vec_cmp_type);
9040 new_stmt
9041 = gimple_build_assign (vec_compare, bitop2,
9042 vec_cond_lhs, new_temp);
9043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9044 }
9045 }
a414c77f 9046 }
bb6c2b68
RS
9047 if (reduction_type == EXTRACT_LAST_REDUCTION)
9048 {
9049 if (!is_gimple_val (vec_compare))
9050 {
9051 tree vec_compare_name = make_ssa_name (vec_cmp_type);
e1bd7296
RS
9052 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9053 vec_compare);
bb6c2b68
RS
9054 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9055 vec_compare = vec_compare_name;
9056 }
9057 gcc_assert (reduc_index == 2);
e1bd7296 9058 gcall *new_stmt = gimple_build_call_internal
bb6c2b68
RS
9059 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9060 vec_then_clause);
9061 gimple_call_set_lhs (new_stmt, scalar_dest);
9062 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9063 if (stmt == gsi_stmt (*gsi))
e1bd7296 9064 new_stmt_info = vect_finish_replace_stmt (stmt, new_stmt);
bb6c2b68
RS
9065 else
9066 {
9067 /* In this case we're moving the definition to later in the
9068 block. That doesn't matter because the only uses of the
9069 lhs are in phi statements. */
9070 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9071 gsi_remove (&old_gsi, true);
e1bd7296
RS
9072 new_stmt_info
9073 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
bb6c2b68
RS
9074 }
9075 }
9076 else
9077 {
9078 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
9079 gassign *new_stmt
9080 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9081 vec_then_clause, vec_else_clause);
9082 new_stmt_info
9083 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
bb6c2b68 9084 }
f7e531cf 9085 if (slp_node)
e1bd7296 9086 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
f7e531cf
IR
9087 }
9088
9089 if (slp_node)
9090 continue;
9091
e1bd7296
RS
9092 if (j == 0)
9093 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9094 else
9095 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
f7e531cf 9096
e1bd7296 9097 prev_stmt_info = new_stmt_info;
a855b1b1 9098 }
b8698a0f 9099
9771b263
DN
9100 vec_oprnds0.release ();
9101 vec_oprnds1.release ();
9102 vec_oprnds2.release ();
9103 vec_oprnds3.release ();
f7e531cf 9104
ebfd146a
IR
9105 return true;
9106}
9107
42fd8198
IE
9108/* vectorizable_comparison.
9109
9110 Check if STMT is comparison expression that can be vectorized.
9111 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9112 comparison, put it in VEC_STMT, and insert it at GSI.
9113
9114 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9115
fce57248 9116static bool
42fd8198 9117vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 9118 stmt_vec_info *vec_stmt, tree reduc_def,
68435eb2 9119 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198
IE
9120{
9121 tree lhs, rhs1, rhs2;
9122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9123 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9124 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9125 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9126 tree new_temp;
9127 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9128 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9129 int ndts = 2;
928686b1 9130 poly_uint64 nunits;
42fd8198 9131 int ncopies;
49e76ff1 9132 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9133 stmt_vec_info prev_stmt_info = NULL;
9134 int i, j;
9135 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9136 vec<tree> vec_oprnds0 = vNULL;
9137 vec<tree> vec_oprnds1 = vNULL;
42fd8198
IE
9138 tree mask_type;
9139 tree mask;
9140
c245362b
IE
9141 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9142 return false;
9143
30480bcd 9144 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9145 return false;
9146
9147 mask_type = vectype;
9148 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9149
fce57248 9150 if (slp_node)
42fd8198
IE
9151 ncopies = 1;
9152 else
e8f142e2 9153 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9154
9155 gcc_assert (ncopies >= 1);
42fd8198
IE
9156 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9157 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9158 && reduc_def))
9159 return false;
9160
9161 if (STMT_VINFO_LIVE_P (stmt_info))
9162 {
9163 if (dump_enabled_p ())
9164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9165 "value used after loop.\n");
9166 return false;
9167 }
9168
9169 if (!is_gimple_assign (stmt))
9170 return false;
9171
9172 code = gimple_assign_rhs_code (stmt);
9173
9174 if (TREE_CODE_CLASS (code) != tcc_comparison)
9175 return false;
9176
9177 rhs1 = gimple_assign_rhs1 (stmt);
9178 rhs2 = gimple_assign_rhs2 (stmt);
9179
894dd753 9180 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
42fd8198
IE
9181 return false;
9182
894dd753 9183 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
42fd8198
IE
9184 return false;
9185
9186 if (vectype1 && vectype2
928686b1
RS
9187 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9188 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9189 return false;
9190
9191 vectype = vectype1 ? vectype1 : vectype2;
9192
9193 /* Invariant comparison. */
9194 if (!vectype)
9195 {
69a9a66f 9196 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9197 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9198 return false;
9199 }
928686b1 9200 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9201 return false;
9202
49e76ff1
IE
9203 /* Can't compare mask and non-mask types. */
9204 if (vectype1 && vectype2
9205 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9206 return false;
9207
9208 /* Boolean values may have another representation in vectors
9209 and therefore we prefer bit operations over comparison for
9210 them (which also works for scalar masks). We store opcodes
9211 to use in bitop1 and bitop2. Statement is vectorized as
9212 BITOP2 (rhs1 BITOP1 rhs2) or
9213 rhs1 BITOP2 (BITOP1 rhs2)
9214 depending on bitop1 and bitop2 arity. */
9215 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9216 {
9217 if (code == GT_EXPR)
9218 {
9219 bitop1 = BIT_NOT_EXPR;
9220 bitop2 = BIT_AND_EXPR;
9221 }
9222 else if (code == GE_EXPR)
9223 {
9224 bitop1 = BIT_NOT_EXPR;
9225 bitop2 = BIT_IOR_EXPR;
9226 }
9227 else if (code == LT_EXPR)
9228 {
9229 bitop1 = BIT_NOT_EXPR;
9230 bitop2 = BIT_AND_EXPR;
9231 std::swap (rhs1, rhs2);
264d951a 9232 std::swap (dts[0], dts[1]);
49e76ff1
IE
9233 }
9234 else if (code == LE_EXPR)
9235 {
9236 bitop1 = BIT_NOT_EXPR;
9237 bitop2 = BIT_IOR_EXPR;
9238 std::swap (rhs1, rhs2);
264d951a 9239 std::swap (dts[0], dts[1]);
49e76ff1
IE
9240 }
9241 else
9242 {
9243 bitop1 = BIT_XOR_EXPR;
9244 if (code == EQ_EXPR)
9245 bitop2 = BIT_NOT_EXPR;
9246 }
9247 }
9248
42fd8198
IE
9249 if (!vec_stmt)
9250 {
49e76ff1 9251 if (bitop1 == NOP_EXPR)
68435eb2
RB
9252 {
9253 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9254 return false;
9255 }
49e76ff1
IE
9256 else
9257 {
9258 machine_mode mode = TYPE_MODE (vectype);
9259 optab optab;
9260
9261 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9262 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9263 return false;
9264
9265 if (bitop2 != NOP_EXPR)
9266 {
9267 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9268 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9269 return false;
9270 }
49e76ff1 9271 }
68435eb2
RB
9272
9273 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9274 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9275 dts, ndts, slp_node, cost_vec);
9276 return true;
42fd8198
IE
9277 }
9278
9279 /* Transform. */
9280 if (!slp_node)
9281 {
9282 vec_oprnds0.create (1);
9283 vec_oprnds1.create (1);
9284 }
9285
9286 /* Handle def. */
9287 lhs = gimple_assign_lhs (stmt);
9288 mask = vect_create_destination_var (lhs, mask_type);
9289
9290 /* Handle cmp expr. */
9291 for (j = 0; j < ncopies; j++)
9292 {
e1bd7296 9293 stmt_vec_info new_stmt_info = NULL;
42fd8198
IE
9294 if (j == 0)
9295 {
9296 if (slp_node)
9297 {
9298 auto_vec<tree, 2> ops;
9299 auto_vec<vec<tree>, 2> vec_defs;
9300
9301 ops.safe_push (rhs1);
9302 ops.safe_push (rhs2);
306b0c92 9303 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9304 vec_oprnds1 = vec_defs.pop ();
9305 vec_oprnds0 = vec_defs.pop ();
9306 }
9307 else
9308 {
e4af0bc4
IE
9309 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9310 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9311 }
9312 }
9313 else
9314 {
9315 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9316 vec_oprnds0.pop ());
9317 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9318 vec_oprnds1.pop ());
9319 }
9320
9321 if (!slp_node)
9322 {
9323 vec_oprnds0.quick_push (vec_rhs1);
9324 vec_oprnds1.quick_push (vec_rhs2);
9325 }
9326
9327 /* Arguments are ready. Create the new vector stmt. */
9328 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9329 {
9330 vec_rhs2 = vec_oprnds1[i];
9331
9332 new_temp = make_ssa_name (mask);
49e76ff1
IE
9333 if (bitop1 == NOP_EXPR)
9334 {
e1bd7296
RS
9335 gassign *new_stmt = gimple_build_assign (new_temp, code,
9336 vec_rhs1, vec_rhs2);
9337 new_stmt_info
9338 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
49e76ff1
IE
9339 }
9340 else
9341 {
e1bd7296 9342 gassign *new_stmt;
49e76ff1
IE
9343 if (bitop1 == BIT_NOT_EXPR)
9344 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9345 else
9346 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9347 vec_rhs2);
e1bd7296
RS
9348 new_stmt_info
9349 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
49e76ff1
IE
9350 if (bitop2 != NOP_EXPR)
9351 {
9352 tree res = make_ssa_name (mask);
9353 if (bitop2 == BIT_NOT_EXPR)
9354 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9355 else
9356 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9357 new_temp);
e1bd7296
RS
9358 new_stmt_info
9359 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
49e76ff1
IE
9360 }
9361 }
42fd8198 9362 if (slp_node)
e1bd7296 9363 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
42fd8198
IE
9364 }
9365
9366 if (slp_node)
9367 continue;
9368
9369 if (j == 0)
e1bd7296 9370 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
42fd8198 9371 else
e1bd7296 9372 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
42fd8198 9373
e1bd7296 9374 prev_stmt_info = new_stmt_info;
42fd8198
IE
9375 }
9376
9377 vec_oprnds0.release ();
9378 vec_oprnds1.release ();
9379
9380 return true;
9381}
ebfd146a 9382
68a0f2ff
RS
9383/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9384 can handle all live statements in the node. Otherwise return true
9385 if STMT is not live or if vectorizable_live_operation can handle it.
9386 GSI and VEC_STMT are as for vectorizable_live_operation. */
9387
9388static bool
9389can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 9390 slp_tree slp_node, stmt_vec_info *vec_stmt,
68435eb2 9391 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9392{
9393 if (slp_node)
9394 {
b9787581 9395 stmt_vec_info slp_stmt_info;
68a0f2ff 9396 unsigned int i;
b9787581 9397 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
68a0f2ff 9398 {
68a0f2ff 9399 if (STMT_VINFO_LIVE_P (slp_stmt_info)
b9787581 9400 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
68435eb2 9401 vec_stmt, cost_vec))
68a0f2ff
RS
9402 return false;
9403 }
9404 }
9405 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
68435eb2
RB
9406 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9407 cost_vec))
68a0f2ff
RS
9408 return false;
9409
9410 return true;
9411}
9412
8644a673 9413/* Make sure the statement is vectorizable. */
ebfd146a
IR
9414
9415bool
891ad31c 9416vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
68435eb2 9417 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
ebfd146a 9418{
8644a673 9419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6585ff8f 9420 vec_info *vinfo = stmt_info->vinfo;
a70d6342 9421 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9422 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9423 bool ok;
363477c0 9424 gimple_seq pattern_def_seq;
ebfd146a 9425
73fbfcad 9426 if (dump_enabled_p ())
ebfd146a 9427 {
78c60e3d
SS
9428 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9429 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9430 }
ebfd146a 9431
1825a1f3 9432 if (gimple_has_volatile_ops (stmt))
b8698a0f 9433 {
73fbfcad 9434 if (dump_enabled_p ())
78c60e3d 9435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9436 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9437
9438 return false;
9439 }
b8698a0f 9440
d54a098e
RS
9441 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9442 && node == NULL
9443 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9444 {
9445 gimple_stmt_iterator si;
9446
9447 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9448 {
9449 gimple *pattern_def_stmt = gsi_stmt (si);
6585ff8f
RS
9450 stmt_vec_info pattern_def_stmt_info
9451 = vinfo->lookup_stmt (gsi_stmt (si));
9452 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9453 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
d54a098e
RS
9454 {
9455 /* Analyze def stmt of STMT if it's a pattern stmt. */
9456 if (dump_enabled_p ())
9457 {
9458 dump_printf_loc (MSG_NOTE, vect_location,
9459 "==> examining pattern def statement: ");
9460 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9461 }
9462
9463 if (!vect_analyze_stmt (pattern_def_stmt,
9464 need_to_vectorize, node, node_instance,
9465 cost_vec))
9466 return false;
9467 }
9468 }
9469 }
9470
b8698a0f 9471 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9472 to include:
9473 - the COND_EXPR which is the loop exit condition
9474 - any LABEL_EXPRs in the loop
b8698a0f 9475 - computations that are used only for array indexing or loop control.
8644a673 9476 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9477 instance, therefore, all the statements are relevant.
ebfd146a 9478
d092494c 9479 Pattern statement needs to be analyzed instead of the original statement
83197f37 9480 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9481 statements. In basic blocks we are called from some SLP instance
9482 traversal, don't analyze pattern stmts instead, the pattern stmts
9483 already will be part of SLP instance. */
83197f37 9484
10681ce8 9485 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9486 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9487 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9488 {
9d5e7640 9489 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10681ce8
RS
9490 && pattern_stmt_info
9491 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9492 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9d5e7640 9493 {
83197f37 9494 /* Analyze PATTERN_STMT instead of the original stmt. */
10681ce8
RS
9495 stmt = pattern_stmt_info->stmt;
9496 stmt_info = pattern_stmt_info;
73fbfcad 9497 if (dump_enabled_p ())
9d5e7640 9498 {
78c60e3d
SS
9499 dump_printf_loc (MSG_NOTE, vect_location,
9500 "==> examining pattern statement: ");
9501 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9502 }
9503 }
9504 else
9505 {
73fbfcad 9506 if (dump_enabled_p ())
e645e942 9507 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9508
9d5e7640
IR
9509 return true;
9510 }
8644a673 9511 }
83197f37 9512 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9513 && node == NULL
10681ce8
RS
9514 && pattern_stmt_info
9515 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9516 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
83197f37
IR
9517 {
9518 /* Analyze PATTERN_STMT too. */
73fbfcad 9519 if (dump_enabled_p ())
83197f37 9520 {
78c60e3d
SS
9521 dump_printf_loc (MSG_NOTE, vect_location,
9522 "==> examining pattern statement: ");
9523 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9524 }
9525
10681ce8 9526 if (!vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
68435eb2 9527 node_instance, cost_vec))
83197f37
IR
9528 return false;
9529 }
ebfd146a 9530
8644a673
IR
9531 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9532 {
9533 case vect_internal_def:
9534 break;
ebfd146a 9535
8644a673 9536 case vect_reduction_def:
7c5222ff 9537 case vect_nested_cycle:
14a61437
RB
9538 gcc_assert (!bb_vinfo
9539 && (relevance == vect_used_in_outer
9540 || relevance == vect_used_in_outer_by_reduction
9541 || relevance == vect_used_by_reduction
b28ead45
AH
9542 || relevance == vect_unused_in_scope
9543 || relevance == vect_used_only_live));
8644a673
IR
9544 break;
9545
9546 case vect_induction_def:
e7baeb39
RB
9547 gcc_assert (!bb_vinfo);
9548 break;
9549
8644a673
IR
9550 case vect_constant_def:
9551 case vect_external_def:
9552 case vect_unknown_def_type:
9553 default:
9554 gcc_unreachable ();
9555 }
ebfd146a 9556
8644a673 9557 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9558 {
8644a673 9559 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9560 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9561 || (is_gimple_call (stmt)
9562 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9563 *need_to_vectorize = true;
ebfd146a
IR
9564 }
9565
b1af7da6
RB
9566 if (PURE_SLP_STMT (stmt_info) && !node)
9567 {
9568 dump_printf_loc (MSG_NOTE, vect_location,
9569 "handled only by SLP analysis\n");
9570 return true;
9571 }
9572
9573 ok = true;
9574 if (!bb_vinfo
9575 && (STMT_VINFO_RELEVANT_P (stmt_info)
9576 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
68435eb2
RB
9577 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9578 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9579 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9580 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9581 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9582 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9583 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9584 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9585 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9586 cost_vec)
9587 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9588 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9589 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
b1af7da6
RB
9590 else
9591 {
9592 if (bb_vinfo)
68435eb2
RB
9593 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9594 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9595 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9596 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9597 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9598 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9599 cost_vec)
9600 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9601 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9602 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9603 cost_vec)
9604 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9605 cost_vec));
b1af7da6 9606 }
8644a673
IR
9607
9608 if (!ok)
ebfd146a 9609 {
73fbfcad 9610 if (dump_enabled_p ())
8644a673 9611 {
78c60e3d
SS
9612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9613 "not vectorized: relevant stmt not ");
9614 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9615 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9616 }
b8698a0f 9617
ebfd146a
IR
9618 return false;
9619 }
9620
8644a673
IR
9621 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9622 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9623 if (!bb_vinfo
9624 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9625 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
ebfd146a 9626 {
73fbfcad 9627 if (dump_enabled_p ())
8644a673 9628 {
78c60e3d 9629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9630 "not vectorized: live stmt not supported: ");
78c60e3d 9631 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9632 }
b8698a0f 9633
8644a673 9634 return false;
ebfd146a
IR
9635 }
9636
ebfd146a
IR
9637 return true;
9638}
9639
9640
9641/* Function vect_transform_stmt.
9642
9643 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9644
9645bool
355fe088 9646vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9647 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9648 slp_instance slp_node_instance)
9649{
6585ff8f
RS
9650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9651 vec_info *vinfo = stmt_info->vinfo;
ebfd146a 9652 bool is_store = false;
1eede195 9653 stmt_vec_info vec_stmt = NULL;
ebfd146a 9654 bool done;
ebfd146a 9655
fce57248 9656 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
1eede195 9657 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9658
e57d9a82
RB
9659 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9660 && nested_in_vect_loop_p
9661 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9662 stmt));
9663
ebfd146a
IR
9664 switch (STMT_VINFO_TYPE (stmt_info))
9665 {
9666 case type_demotion_vec_info_type:
ebfd146a 9667 case type_promotion_vec_info_type:
ebfd146a 9668 case type_conversion_vec_info_type:
68435eb2 9669 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9670 gcc_assert (done);
9671 break;
9672
9673 case induc_vec_info_type:
68435eb2 9674 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9675 gcc_assert (done);
9676 break;
9677
9dc3f7de 9678 case shift_vec_info_type:
68435eb2 9679 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9680 gcc_assert (done);
9681 break;
9682
ebfd146a 9683 case op_vec_info_type:
68435eb2 9684 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9685 gcc_assert (done);
9686 break;
9687
9688 case assignment_vec_info_type:
68435eb2 9689 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9690 gcc_assert (done);
9691 break;
9692
9693 case load_vec_info_type:
b8698a0f 9694 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9695 slp_node_instance, NULL);
ebfd146a
IR
9696 gcc_assert (done);
9697 break;
9698
9699 case store_vec_info_type:
68435eb2 9700 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9701 gcc_assert (done);
0d0293ac 9702 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9703 {
9704 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9705 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9706 one are skipped, and there vec_stmt_info shouldn't be freed
9707 meanwhile. */
0d0293ac 9708 *grouped_store = true;
bffb8014 9709 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2c53b149 9710 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
ebfd146a 9711 is_store = true;
f307441a 9712 }
ebfd146a
IR
9713 else
9714 is_store = true;
9715 break;
9716
9717 case condition_vec_info_type:
68435eb2 9718 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
ebfd146a
IR
9719 gcc_assert (done);
9720 break;
9721
42fd8198 9722 case comparison_vec_info_type:
68435eb2 9723 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
42fd8198
IE
9724 gcc_assert (done);
9725 break;
9726
ebfd146a 9727 case call_vec_info_type:
68435eb2 9728 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9729 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9730 break;
9731
0136f8f0 9732 case call_simd_clone_vec_info_type:
68435eb2 9733 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
0136f8f0
AH
9734 stmt = gsi_stmt (*gsi);
9735 break;
9736
ebfd146a 9737 case reduc_vec_info_type:
891ad31c 9738 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9739 slp_node_instance, NULL);
ebfd146a
IR
9740 gcc_assert (done);
9741 break;
9742
9743 default:
9744 if (!STMT_VINFO_LIVE_P (stmt_info))
9745 {
73fbfcad 9746 if (dump_enabled_p ())
78c60e3d 9747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9748 "stmt not supported.\n");
ebfd146a
IR
9749 gcc_unreachable ();
9750 }
9751 }
9752
225ce44b
RB
9753 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9754 This would break hybrid SLP vectorization. */
9755 if (slp_node)
d90f8440 9756 gcc_assert (!vec_stmt
1eede195 9757 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
225ce44b 9758
ebfd146a
IR
9759 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9760 is being vectorized, but outside the immediately enclosing loop. */
9761 if (vec_stmt
e57d9a82 9762 && nested_p
ebfd146a
IR
9763 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9764 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9765 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9766 vect_used_in_outer_by_reduction))
ebfd146a 9767 {
a70d6342
IR
9768 struct loop *innerloop = LOOP_VINFO_LOOP (
9769 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9770 imm_use_iterator imm_iter;
9771 use_operand_p use_p;
9772 tree scalar_dest;
ebfd146a 9773
73fbfcad 9774 if (dump_enabled_p ())
78c60e3d 9775 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9776 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9777
9778 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9779 (to be used when vectorizing outer-loop stmts that use the DEF of
9780 STMT). */
9781 if (gimple_code (stmt) == GIMPLE_PHI)
9782 scalar_dest = PHI_RESULT (stmt);
9783 else
9784 scalar_dest = gimple_assign_lhs (stmt);
9785
9786 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6585ff8f
RS
9787 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9788 {
9789 stmt_vec_info exit_phi_info
9790 = vinfo->lookup_stmt (USE_STMT (use_p));
9791 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9792 }
ebfd146a
IR
9793 }
9794
9795 /* Handle stmts whose DEF is used outside the loop-nest that is
9796 being vectorized. */
68a0f2ff 9797 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9798 {
68435eb2 9799 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
ebfd146a
IR
9800 gcc_assert (done);
9801 }
9802
9803 if (vec_stmt)
83197f37 9804 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9805
b8698a0f 9806 return is_store;
ebfd146a
IR
9807}
9808
9809
b8698a0f 9810/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9811 stmt_vec_info. */
9812
9813void
355fe088 9814vect_remove_stores (gimple *first_stmt)
ebfd146a 9815{
355fe088 9816 gimple *next = first_stmt;
ebfd146a
IR
9817 gimple_stmt_iterator next_si;
9818
9819 while (next)
9820 {
78048b1c
JJ
9821 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9822
bffb8014 9823 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
78048b1c
JJ
9824 if (is_pattern_stmt_p (stmt_info))
9825 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9826 /* Free the attached stmt_vec_info and remove the stmt. */
9827 next_si = gsi_for_stmt (next);
3d3f2249 9828 unlink_stmt_vdef (next);
ebfd146a 9829 gsi_remove (&next_si, true);
3d3f2249 9830 release_defs (next);
ebfd146a
IR
9831 free_stmt_vec_info (next);
9832 next = tmp;
9833 }
9834}
9835
9836
9837/* Function new_stmt_vec_info.
9838
9839 Create and initialize a new stmt_vec_info struct for STMT. */
9840
9841stmt_vec_info
310213d4 9842new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9843{
9844 stmt_vec_info res;
dbe1b846 9845 res = (_stmt_vec_info *) xcalloc (1, sizeof (struct _stmt_vec_info));
ebfd146a
IR
9846
9847 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9848 STMT_VINFO_STMT (res) = stmt;
310213d4 9849 res->vinfo = vinfo;
8644a673 9850 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9851 STMT_VINFO_LIVE_P (res) = false;
9852 STMT_VINFO_VECTYPE (res) = NULL;
9853 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9854 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a 9855 STMT_VINFO_IN_PATTERN_P (res) = false;
363477c0 9856 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9857 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9858 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9859 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9860
ebfd146a
IR
9861 if (gimple_code (stmt) == GIMPLE_PHI
9862 && is_loop_header_bb_p (gimple_bb (stmt)))
9863 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9864 else
8644a673
IR
9865 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9866
9771b263 9867 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9868 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9869 STMT_VINFO_NUM_SLP_USES (res) = 0;
9870
2c53b149
RB
9871 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9872 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9873 res->size = 0; /* GROUP_SIZE */
9874 res->store_count = 0; /* GROUP_STORE_COUNT */
9875 res->gap = 0; /* GROUP_GAP */
9876 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
ebfd146a 9877
ca823c85
RB
9878 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9879 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9880
ebfd146a
IR
9881 return res;
9882}
9883
9884
f8c0baaf 9885/* Set the current stmt_vec_info vector to V. */
ebfd146a
IR
9886
9887void
f8c0baaf 9888set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
ebfd146a 9889{
f8c0baaf 9890 stmt_vec_info_vec = v;
ebfd146a
IR
9891}
9892
f8c0baaf 9893/* Free the stmt_vec_info entries in V and release V. */
ebfd146a
IR
9894
9895void
f8c0baaf 9896free_stmt_vec_infos (vec<stmt_vec_info> *v)
ebfd146a 9897{
93675444 9898 unsigned int i;
3161455c 9899 stmt_vec_info info;
f8c0baaf 9900 FOR_EACH_VEC_ELT (*v, i, info)
dbe1b846 9901 if (info != NULL_STMT_VEC_INFO)
3161455c 9902 free_stmt_vec_info (STMT_VINFO_STMT (info));
f8c0baaf
RB
9903 if (v == stmt_vec_info_vec)
9904 stmt_vec_info_vec = NULL;
9905 v->release ();
ebfd146a
IR
9906}
9907
9908
9909/* Free stmt vectorization related info. */
9910
9911void
355fe088 9912free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9913{
9914 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9915
9916 if (!stmt_info)
9917 return;
9918
78048b1c
JJ
9919 /* Check if this statement has a related "pattern stmt"
9920 (introduced by the vectorizer during the pattern recognition
9921 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9922 too. */
9923 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9924 {
e3947d80
RS
9925 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9926 for (gimple_stmt_iterator si = gsi_start (seq);
9927 !gsi_end_p (si); gsi_next (&si))
9928 {
9929 gimple *seq_stmt = gsi_stmt (si);
9930 gimple_set_bb (seq_stmt, NULL);
9931 tree lhs = gimple_get_lhs (seq_stmt);
9932 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9933 release_ssa_name (lhs);
9934 free_stmt_vec_info (seq_stmt);
9935 }
10681ce8
RS
9936 stmt_vec_info patt_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9937 if (patt_stmt_info)
78048b1c 9938 {
10681ce8
RS
9939 gimple_set_bb (patt_stmt_info->stmt, NULL);
9940 tree lhs = gimple_get_lhs (patt_stmt_info->stmt);
e6f5c25d 9941 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9942 release_ssa_name (lhs);
10681ce8 9943 free_stmt_vec_info (patt_stmt_info);
78048b1c
JJ
9944 }
9945 }
9946
9771b263 9947 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9948 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9949 set_vinfo_for_stmt (stmt, NULL);
9950 free (stmt_info);
9951}
9952
9953
bb67d9c7 9954/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9955
bb67d9c7 9956 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9957 by the target. */
9958
c803b2a9 9959tree
86e36728 9960get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9961{
c7d97b28 9962 tree orig_scalar_type = scalar_type;
3bd8f481 9963 scalar_mode inner_mode;
ef4bddc2 9964 machine_mode simd_mode;
86e36728 9965 poly_uint64 nunits;
ebfd146a
IR
9966 tree vectype;
9967
3bd8f481
RS
9968 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9969 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9970 return NULL_TREE;
9971
3bd8f481 9972 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9973
7b7b1813
RG
9974 /* For vector types of elements whose mode precision doesn't
9975 match their types precision we use a element type of mode
9976 precision. The vectorization routines will have to make sure
48f2e373
RB
9977 they support the proper result truncation/extension.
9978 We also make sure to build vector types with INTEGER_TYPE
9979 component type only. */
6d7971b8 9980 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9981 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9982 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9983 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9984 TYPE_UNSIGNED (scalar_type));
6d7971b8 9985
ccbf5bb4
RG
9986 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9987 When the component mode passes the above test simply use a type
9988 corresponding to that mode. The theory is that any use that
9989 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9990 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9991 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9992 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9993
9994 /* We can't build a vector type of elements with alignment bigger than
9995 their size. */
dfc2e2ac 9996 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9997 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9998 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9999
dfc2e2ac
RB
10000 /* If we felt back to using the mode fail if there was
10001 no scalar type for it. */
10002 if (scalar_type == NULL_TREE)
10003 return NULL_TREE;
10004
bb67d9c7
RG
10005 /* If no size was supplied use the mode the target prefers. Otherwise
10006 lookup a vector mode of the specified size. */
86e36728 10007 if (known_eq (size, 0U))
bb67d9c7 10008 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
10009 else if (!multiple_p (size, nbytes, &nunits)
10010 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 10011 return NULL_TREE;
4c8fd8ac 10012 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 10013 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 10014 return NULL_TREE;
ebfd146a
IR
10015
10016 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
10017
10018 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10019 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 10020 return NULL_TREE;
ebfd146a 10021
c7d97b28
RB
10022 /* Re-attach the address-space qualifier if we canonicalized the scalar
10023 type. */
10024 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10025 return build_qualified_type
10026 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10027
ebfd146a
IR
10028 return vectype;
10029}
10030
86e36728 10031poly_uint64 current_vector_size;
bb67d9c7
RG
10032
10033/* Function get_vectype_for_scalar_type.
10034
10035 Returns the vector type corresponding to SCALAR_TYPE as supported
10036 by the target. */
10037
10038tree
10039get_vectype_for_scalar_type (tree scalar_type)
10040{
10041 tree vectype;
10042 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10043 current_vector_size);
10044 if (vectype
86e36728 10045 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
10046 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10047 return vectype;
10048}
10049
42fd8198
IE
10050/* Function get_mask_type_for_scalar_type.
10051
10052 Returns the mask type corresponding to a result of comparison
10053 of vectors of specified SCALAR_TYPE as supported by target. */
10054
10055tree
10056get_mask_type_for_scalar_type (tree scalar_type)
10057{
10058 tree vectype = get_vectype_for_scalar_type (scalar_type);
10059
10060 if (!vectype)
10061 return NULL;
10062
10063 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10064 current_vector_size);
10065}
10066
b690cc0f
RG
10067/* Function get_same_sized_vectype
10068
10069 Returns a vector type corresponding to SCALAR_TYPE of size
10070 VECTOR_TYPE if supported by the target. */
10071
10072tree
bb67d9c7 10073get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 10074{
2568d8a1 10075 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
10076 return build_same_sized_truth_vector_type (vector_type);
10077
bb67d9c7
RG
10078 return get_vectype_for_scalar_type_and_size
10079 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
10080}
10081
ebfd146a
IR
10082/* Function vect_is_simple_use.
10083
10084 Input:
81c40241
RB
10085 VINFO - the vect info of the loop or basic block that is being vectorized.
10086 OPERAND - operand in the loop or bb.
10087 Output:
fef96d8e
RS
10088 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10089 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10090 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10091 the definition could be anywhere in the function
81c40241 10092 DT - the type of definition
ebfd146a
IR
10093
10094 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 10095 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 10096 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 10097 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
10098 is the case in reduction/induction computations).
10099 For basic blocks, supportable operands are constants and bb invariants.
10100 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
10101
10102bool
894dd753 10103vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
fef96d8e 10104 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
b8698a0f 10105{
fef96d8e
RS
10106 if (def_stmt_info_out)
10107 *def_stmt_info_out = NULL;
894dd753
RS
10108 if (def_stmt_out)
10109 *def_stmt_out = NULL;
3fc356dc 10110 *dt = vect_unknown_def_type;
b8698a0f 10111
73fbfcad 10112 if (dump_enabled_p ())
ebfd146a 10113 {
78c60e3d
SS
10114 dump_printf_loc (MSG_NOTE, vect_location,
10115 "vect_is_simple_use: operand ");
30f502ed
RB
10116 if (TREE_CODE (operand) == SSA_NAME
10117 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10118 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10119 else
10120 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 10121 }
b8698a0f 10122
b758f602 10123 if (CONSTANT_CLASS_P (operand))
30f502ed
RB
10124 *dt = vect_constant_def;
10125 else if (is_gimple_min_invariant (operand))
10126 *dt = vect_external_def;
10127 else if (TREE_CODE (operand) != SSA_NAME)
10128 *dt = vect_unknown_def_type;
10129 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
8644a673 10130 *dt = vect_external_def;
ebfd146a
IR
10131 else
10132 {
30f502ed 10133 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
c98d0595
RS
10134 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10135 if (!stmt_vinfo)
30f502ed
RB
10136 *dt = vect_external_def;
10137 else
0f8c840c 10138 {
30f502ed
RB
10139 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
10140 {
10681ce8
RS
10141 stmt_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
10142 def_stmt = stmt_vinfo->stmt;
30f502ed
RB
10143 }
10144 switch (gimple_code (def_stmt))
10145 {
10146 case GIMPLE_PHI:
10147 case GIMPLE_ASSIGN:
10148 case GIMPLE_CALL:
10149 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10150 break;
10151 default:
10152 *dt = vect_unknown_def_type;
10153 break;
10154 }
fef96d8e
RS
10155 if (def_stmt_info_out)
10156 *def_stmt_info_out = stmt_vinfo;
0f8c840c 10157 }
30f502ed
RB
10158 if (def_stmt_out)
10159 *def_stmt_out = def_stmt;
ebfd146a
IR
10160 }
10161
2e8ab70c
RB
10162 if (dump_enabled_p ())
10163 {
30f502ed 10164 dump_printf (MSG_NOTE, ", type of def: ");
2e8ab70c
RB
10165 switch (*dt)
10166 {
10167 case vect_uninitialized_def:
10168 dump_printf (MSG_NOTE, "uninitialized\n");
10169 break;
10170 case vect_constant_def:
10171 dump_printf (MSG_NOTE, "constant\n");
10172 break;
10173 case vect_external_def:
10174 dump_printf (MSG_NOTE, "external\n");
10175 break;
10176 case vect_internal_def:
10177 dump_printf (MSG_NOTE, "internal\n");
10178 break;
10179 case vect_induction_def:
10180 dump_printf (MSG_NOTE, "induction\n");
10181 break;
10182 case vect_reduction_def:
10183 dump_printf (MSG_NOTE, "reduction\n");
10184 break;
10185 case vect_double_reduction_def:
10186 dump_printf (MSG_NOTE, "double reduction\n");
10187 break;
10188 case vect_nested_cycle:
10189 dump_printf (MSG_NOTE, "nested cycle\n");
10190 break;
10191 case vect_unknown_def_type:
10192 dump_printf (MSG_NOTE, "unknown\n");
10193 break;
10194 }
10195 }
10196
81c40241 10197 if (*dt == vect_unknown_def_type)
ebfd146a 10198 {
73fbfcad 10199 if (dump_enabled_p ())
78c60e3d 10200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10201 "Unsupported pattern.\n");
ebfd146a
IR
10202 return false;
10203 }
10204
ebfd146a
IR
10205 return true;
10206}
10207
81c40241 10208/* Function vect_is_simple_use.
b690cc0f 10209
81c40241 10210 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10211 type of OPERAND and stores it to *VECTYPE. If the definition of
10212 OPERAND is vect_uninitialized_def, vect_constant_def or
10213 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10214 is responsible to compute the best suited vector type for the
10215 scalar operand. */
10216
10217bool
894dd753 10218vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
fef96d8e
RS
10219 tree *vectype, stmt_vec_info *def_stmt_info_out,
10220 gimple **def_stmt_out)
b690cc0f 10221{
fef96d8e 10222 stmt_vec_info def_stmt_info;
894dd753 10223 gimple *def_stmt;
fef96d8e 10224 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
b690cc0f
RG
10225 return false;
10226
894dd753
RS
10227 if (def_stmt_out)
10228 *def_stmt_out = def_stmt;
fef96d8e
RS
10229 if (def_stmt_info_out)
10230 *def_stmt_info_out = def_stmt_info;
894dd753 10231
b690cc0f
RG
10232 /* Now get a vector type if the def is internal, otherwise supply
10233 NULL_TREE and leave it up to the caller to figure out a proper
10234 type for the use stmt. */
10235 if (*dt == vect_internal_def
10236 || *dt == vect_induction_def
10237 || *dt == vect_reduction_def
10238 || *dt == vect_double_reduction_def
10239 || *dt == vect_nested_cycle)
10240 {
fef96d8e 10241 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
b690cc0f 10242 gcc_assert (*vectype != NULL_TREE);
30f502ed
RB
10243 if (dump_enabled_p ())
10244 {
10245 dump_printf_loc (MSG_NOTE, vect_location,
10246 "vect_is_simple_use: vectype ");
10247 dump_generic_expr (MSG_NOTE, TDF_SLIM, *vectype);
10248 dump_printf (MSG_NOTE, "\n");
10249 }
b690cc0f
RG
10250 }
10251 else if (*dt == vect_uninitialized_def
10252 || *dt == vect_constant_def
10253 || *dt == vect_external_def)
10254 *vectype = NULL_TREE;
10255 else
10256 gcc_unreachable ();
10257
10258 return true;
10259}
10260
ebfd146a
IR
10261
10262/* Function supportable_widening_operation
10263
b8698a0f
L
10264 Check whether an operation represented by the code CODE is a
10265 widening operation that is supported by the target platform in
b690cc0f
RG
10266 vector form (i.e., when operating on arguments of type VECTYPE_IN
10267 producing a result of type VECTYPE_OUT).
b8698a0f 10268
1bda738b
JJ
10269 Widening operations we currently support are NOP (CONVERT), FLOAT,
10270 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10271 are supported by the target platform either directly (via vector
10272 tree-codes), or via target builtins.
ebfd146a
IR
10273
10274 Output:
b8698a0f
L
10275 - CODE1 and CODE2 are codes of vector operations to be used when
10276 vectorizing the operation, if available.
ebfd146a
IR
10277 - MULTI_STEP_CVT determines the number of required intermediate steps in
10278 case of multi-step conversion (like char->short->int - in that case
10279 MULTI_STEP_CVT will be 1).
b8698a0f
L
10280 - INTERM_TYPES contains the intermediate type required to perform the
10281 widening operation (short in the above example). */
ebfd146a
IR
10282
10283bool
355fe088 10284supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10285 tree vectype_out, tree vectype_in,
ebfd146a
IR
10286 enum tree_code *code1, enum tree_code *code2,
10287 int *multi_step_cvt,
9771b263 10288 vec<tree> *interm_types)
ebfd146a
IR
10289{
10290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10291 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10292 struct loop *vect_loop = NULL;
ef4bddc2 10293 machine_mode vec_mode;
81f40b79 10294 enum insn_code icode1, icode2;
ebfd146a 10295 optab optab1, optab2;
b690cc0f
RG
10296 tree vectype = vectype_in;
10297 tree wide_vectype = vectype_out;
ebfd146a 10298 enum tree_code c1, c2;
4a00c761
JJ
10299 int i;
10300 tree prev_type, intermediate_type;
ef4bddc2 10301 machine_mode intermediate_mode, prev_mode;
4a00c761 10302 optab optab3, optab4;
ebfd146a 10303
4a00c761 10304 *multi_step_cvt = 0;
4ef69dfc
IR
10305 if (loop_info)
10306 vect_loop = LOOP_VINFO_LOOP (loop_info);
10307
ebfd146a
IR
10308 switch (code)
10309 {
10310 case WIDEN_MULT_EXPR:
6ae6116f
RH
10311 /* The result of a vectorized widening operation usually requires
10312 two vectors (because the widened results do not fit into one vector).
10313 The generated vector results would normally be expected to be
10314 generated in the same order as in the original scalar computation,
10315 i.e. if 8 results are generated in each vector iteration, they are
10316 to be organized as follows:
10317 vect1: [res1,res2,res3,res4],
10318 vect2: [res5,res6,res7,res8].
10319
10320 However, in the special case that the result of the widening
10321 operation is used in a reduction computation only, the order doesn't
10322 matter (because when vectorizing a reduction we change the order of
10323 the computation). Some targets can take advantage of this and
10324 generate more efficient code. For example, targets like Altivec,
10325 that support widen_mult using a sequence of {mult_even,mult_odd}
10326 generate the following vectors:
10327 vect1: [res1,res3,res5,res7],
10328 vect2: [res2,res4,res6,res8].
10329
10330 When vectorizing outer-loops, we execute the inner-loop sequentially
10331 (each vectorized inner-loop iteration contributes to VF outer-loop
10332 iterations in parallel). We therefore don't allow to change the
10333 order of the computation in the inner-loop during outer-loop
10334 vectorization. */
10335 /* TODO: Another case in which order doesn't *really* matter is when we
10336 widen and then contract again, e.g. (short)((int)x * y >> 8).
10337 Normally, pack_trunc performs an even/odd permute, whereas the
10338 repack from an even/odd expansion would be an interleave, which
10339 would be significantly simpler for e.g. AVX2. */
10340 /* In any case, in order to avoid duplicating the code below, recurse
10341 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10342 are properly set up for the caller. If we fail, we'll continue with
10343 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10344 if (vect_loop
10345 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10346 && !nested_in_vect_loop_p (vect_loop, stmt)
10347 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10348 stmt, vectype_out, vectype_in,
a86ec597
RH
10349 code1, code2, multi_step_cvt,
10350 interm_types))
ebc047a2
CH
10351 {
10352 /* Elements in a vector with vect_used_by_reduction property cannot
10353 be reordered if the use chain with this property does not have the
10354 same operation. One such an example is s += a * b, where elements
10355 in a and b cannot be reordered. Here we check if the vector defined
10356 by STMT is only directly used in the reduction statement. */
0d0a4e20
RS
10357 tree lhs = gimple_assign_lhs (stmt);
10358 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10359 if (use_stmt_info
10360 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10361 return true;
ebc047a2 10362 }
4a00c761
JJ
10363 c1 = VEC_WIDEN_MULT_LO_EXPR;
10364 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10365 break;
10366
81c40241
RB
10367 case DOT_PROD_EXPR:
10368 c1 = DOT_PROD_EXPR;
10369 c2 = DOT_PROD_EXPR;
10370 break;
10371
10372 case SAD_EXPR:
10373 c1 = SAD_EXPR;
10374 c2 = SAD_EXPR;
10375 break;
10376
6ae6116f
RH
10377 case VEC_WIDEN_MULT_EVEN_EXPR:
10378 /* Support the recursion induced just above. */
10379 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10380 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10381 break;
10382
36ba4aae 10383 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10384 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10385 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10386 break;
10387
ebfd146a 10388 CASE_CONVERT:
4a00c761
JJ
10389 c1 = VEC_UNPACK_LO_EXPR;
10390 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10391 break;
10392
10393 case FLOAT_EXPR:
4a00c761
JJ
10394 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10395 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10396 break;
10397
10398 case FIX_TRUNC_EXPR:
1bda738b
JJ
10399 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10400 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10401 break;
ebfd146a
IR
10402
10403 default:
10404 gcc_unreachable ();
10405 }
10406
6ae6116f 10407 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10408 std::swap (c1, c2);
4a00c761 10409
ebfd146a
IR
10410 if (code == FIX_TRUNC_EXPR)
10411 {
10412 /* The signedness is determined from output operand. */
b690cc0f
RG
10413 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10414 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10415 }
10416 else
10417 {
10418 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10419 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10420 }
10421
10422 if (!optab1 || !optab2)
10423 return false;
10424
10425 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10426 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10427 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10428 return false;
10429
4a00c761
JJ
10430 *code1 = c1;
10431 *code2 = c2;
10432
10433 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10434 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10435 /* For scalar masks we may have different boolean
10436 vector types having the same QImode. Thus we
10437 add additional check for elements number. */
10438 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10439 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10440 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10441
b8698a0f 10442 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10443 types. */
ebfd146a 10444
4a00c761
JJ
10445 prev_type = vectype;
10446 prev_mode = vec_mode;
b8698a0f 10447
4a00c761
JJ
10448 if (!CONVERT_EXPR_CODE_P (code))
10449 return false;
b8698a0f 10450
4a00c761
JJ
10451 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10452 intermediate steps in promotion sequence. We try
10453 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10454 not. */
9771b263 10455 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10456 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10457 {
10458 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10459 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10460 {
7cfb4d93 10461 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10462 if (intermediate_mode != TYPE_MODE (intermediate_type))
10463 return false;
10464 }
10465 else
10466 intermediate_type
10467 = lang_hooks.types.type_for_mode (intermediate_mode,
10468 TYPE_UNSIGNED (prev_type));
10469
4a00c761
JJ
10470 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10471 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10472
10473 if (!optab3 || !optab4
10474 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10475 || insn_data[icode1].operand[0].mode != intermediate_mode
10476 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10477 || insn_data[icode2].operand[0].mode != intermediate_mode
10478 || ((icode1 = optab_handler (optab3, intermediate_mode))
10479 == CODE_FOR_nothing)
10480 || ((icode2 = optab_handler (optab4, intermediate_mode))
10481 == CODE_FOR_nothing))
10482 break;
ebfd146a 10483
9771b263 10484 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10485 (*multi_step_cvt)++;
10486
10487 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10488 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10489 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10490 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10491 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10492
10493 prev_type = intermediate_type;
10494 prev_mode = intermediate_mode;
ebfd146a
IR
10495 }
10496
9771b263 10497 interm_types->release ();
4a00c761 10498 return false;
ebfd146a
IR
10499}
10500
10501
10502/* Function supportable_narrowing_operation
10503
b8698a0f
L
10504 Check whether an operation represented by the code CODE is a
10505 narrowing operation that is supported by the target platform in
b690cc0f
RG
10506 vector form (i.e., when operating on arguments of type VECTYPE_IN
10507 and producing a result of type VECTYPE_OUT).
b8698a0f 10508
1bda738b
JJ
10509 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10510 and FLOAT. This function checks if these operations are supported by
ebfd146a
IR
10511 the target platform directly via vector tree-codes.
10512
10513 Output:
b8698a0f
L
10514 - CODE1 is the code of a vector operation to be used when
10515 vectorizing the operation, if available.
ebfd146a
IR
10516 - MULTI_STEP_CVT determines the number of required intermediate steps in
10517 case of multi-step conversion (like int->short->char - in that case
10518 MULTI_STEP_CVT will be 1).
10519 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10520 narrowing operation (short in the above example). */
ebfd146a
IR
10521
10522bool
10523supportable_narrowing_operation (enum tree_code code,
b690cc0f 10524 tree vectype_out, tree vectype_in,
ebfd146a 10525 enum tree_code *code1, int *multi_step_cvt,
9771b263 10526 vec<tree> *interm_types)
ebfd146a 10527{
ef4bddc2 10528 machine_mode vec_mode;
ebfd146a
IR
10529 enum insn_code icode1;
10530 optab optab1, interm_optab;
b690cc0f
RG
10531 tree vectype = vectype_in;
10532 tree narrow_vectype = vectype_out;
ebfd146a 10533 enum tree_code c1;
3ae0661a 10534 tree intermediate_type, prev_type;
ef4bddc2 10535 machine_mode intermediate_mode, prev_mode;
ebfd146a 10536 int i;
4a00c761 10537 bool uns;
ebfd146a 10538
4a00c761 10539 *multi_step_cvt = 0;
ebfd146a
IR
10540 switch (code)
10541 {
10542 CASE_CONVERT:
10543 c1 = VEC_PACK_TRUNC_EXPR;
10544 break;
10545
10546 case FIX_TRUNC_EXPR:
10547 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10548 break;
10549
10550 case FLOAT_EXPR:
1bda738b
JJ
10551 c1 = VEC_PACK_FLOAT_EXPR;
10552 break;
ebfd146a
IR
10553
10554 default:
10555 gcc_unreachable ();
10556 }
10557
10558 if (code == FIX_TRUNC_EXPR)
10559 /* The signedness is determined from output operand. */
b690cc0f 10560 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10561 else
10562 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10563
10564 if (!optab1)
10565 return false;
10566
10567 vec_mode = TYPE_MODE (vectype);
947131ba 10568 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10569 return false;
10570
4a00c761
JJ
10571 *code1 = c1;
10572
10573 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10574 /* For scalar masks we may have different boolean
10575 vector types having the same QImode. Thus we
10576 add additional check for elements number. */
10577 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10578 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10579 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10580
1bda738b
JJ
10581 if (code == FLOAT_EXPR)
10582 return false;
10583
ebfd146a
IR
10584 /* Check if it's a multi-step conversion that can be done using intermediate
10585 types. */
4a00c761 10586 prev_mode = vec_mode;
3ae0661a 10587 prev_type = vectype;
4a00c761
JJ
10588 if (code == FIX_TRUNC_EXPR)
10589 uns = TYPE_UNSIGNED (vectype_out);
10590 else
10591 uns = TYPE_UNSIGNED (vectype);
10592
10593 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10594 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10595 costly than signed. */
10596 if (code == FIX_TRUNC_EXPR && uns)
10597 {
10598 enum insn_code icode2;
10599
10600 intermediate_type
10601 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10602 interm_optab
10603 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10604 if (interm_optab != unknown_optab
4a00c761
JJ
10605 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10606 && insn_data[icode1].operand[0].mode
10607 == insn_data[icode2].operand[0].mode)
10608 {
10609 uns = false;
10610 optab1 = interm_optab;
10611 icode1 = icode2;
10612 }
10613 }
ebfd146a 10614
4a00c761
JJ
10615 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10616 intermediate steps in promotion sequence. We try
10617 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10618 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10619 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10620 {
10621 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10622 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10623 {
7cfb4d93 10624 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10625 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10626 return false;
3ae0661a
IE
10627 }
10628 else
10629 intermediate_type
10630 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10631 interm_optab
10632 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10633 optab_default);
10634 if (!interm_optab
10635 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10636 || insn_data[icode1].operand[0].mode != intermediate_mode
10637 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10638 == CODE_FOR_nothing))
10639 break;
10640
9771b263 10641 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10642 (*multi_step_cvt)++;
10643
10644 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10645 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10646 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10647 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10648
10649 prev_mode = intermediate_mode;
3ae0661a 10650 prev_type = intermediate_type;
4a00c761 10651 optab1 = interm_optab;
ebfd146a
IR
10652 }
10653
9771b263 10654 interm_types->release ();
4a00c761 10655 return false;
ebfd146a 10656}
7cfb4d93
RS
10657
10658/* Generate and return a statement that sets vector mask MASK such that
10659 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10660
10661gcall *
10662vect_gen_while (tree mask, tree start_index, tree end_index)
10663{
10664 tree cmp_type = TREE_TYPE (start_index);
10665 tree mask_type = TREE_TYPE (mask);
10666 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10667 cmp_type, mask_type,
10668 OPTIMIZE_FOR_SPEED));
10669 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10670 start_index, end_index,
10671 build_zero_cst (mask_type));
10672 gimple_call_set_lhs (call, mask);
10673 return call;
10674}
535e7c11
RS
10675
10676/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10677 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10678
10679tree
10680vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10681 tree end_index)
10682{
10683 tree tmp = make_ssa_name (mask_type);
10684 gcall *call = vect_gen_while (tmp, start_index, end_index);
10685 gimple_seq_add_stmt (seq, call);
10686 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10687}
1f3cb663
RS
10688
10689/* Try to compute the vector types required to vectorize STMT_INFO,
10690 returning true on success and false if vectorization isn't possible.
10691
10692 On success:
10693
10694 - Set *STMT_VECTYPE_OUT to:
10695 - NULL_TREE if the statement doesn't need to be vectorized;
10696 - boolean_type_node if the statement is a boolean operation whose
10697 vector type can only be determined once all the other vector types
10698 are known; and
10699 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10700
10701 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10702 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10703 statement does not help to determine the overall number of units. */
10704
10705bool
10706vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10707 tree *stmt_vectype_out,
10708 tree *nunits_vectype_out)
10709{
10710 gimple *stmt = stmt_info->stmt;
10711
10712 *stmt_vectype_out = NULL_TREE;
10713 *nunits_vectype_out = NULL_TREE;
10714
10715 if (gimple_get_lhs (stmt) == NULL_TREE
10716 /* MASK_STORE has no lhs, but is ok. */
10717 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10718 {
10719 if (is_a <gcall *> (stmt))
10720 {
10721 /* Ignore calls with no lhs. These must be calls to
10722 #pragma omp simd functions, and what vectorization factor
10723 it really needs can't be determined until
10724 vectorizable_simd_clone_call. */
10725 if (dump_enabled_p ())
10726 dump_printf_loc (MSG_NOTE, vect_location,
10727 "defer to SIMD clone analysis.\n");
10728 return true;
10729 }
10730
10731 if (dump_enabled_p ())
10732 {
10733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10734 "not vectorized: irregular stmt.");
10735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10736 }
10737 return false;
10738 }
10739
10740 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10741 {
10742 if (dump_enabled_p ())
10743 {
10744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10745 "not vectorized: vector stmt in loop:");
10746 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10747 }
10748 return false;
10749 }
10750
10751 tree vectype;
10752 tree scalar_type = NULL_TREE;
10753 if (STMT_VINFO_VECTYPE (stmt_info))
10754 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10755 else
10756 {
10757 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10758 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10759 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10760 else
10761 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10762
10763 /* Pure bool ops don't participate in number-of-units computation.
10764 For comparisons use the types being compared. */
10765 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10766 && is_gimple_assign (stmt)
10767 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10768 {
10769 *stmt_vectype_out = boolean_type_node;
10770
10771 tree rhs1 = gimple_assign_rhs1 (stmt);
10772 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10773 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10774 scalar_type = TREE_TYPE (rhs1);
10775 else
10776 {
10777 if (dump_enabled_p ())
10778 dump_printf_loc (MSG_NOTE, vect_location,
10779 "pure bool operation.\n");
10780 return true;
10781 }
10782 }
10783
10784 if (dump_enabled_p ())
10785 {
10786 dump_printf_loc (MSG_NOTE, vect_location,
10787 "get vectype for scalar type: ");
10788 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10789 dump_printf (MSG_NOTE, "\n");
10790 }
10791 vectype = get_vectype_for_scalar_type (scalar_type);
10792 if (!vectype)
10793 {
10794 if (dump_enabled_p ())
10795 {
10796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10797 "not vectorized: unsupported data-type ");
10798 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10799 scalar_type);
10800 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10801 }
10802 return false;
10803 }
10804
10805 if (!*stmt_vectype_out)
10806 *stmt_vectype_out = vectype;
10807
10808 if (dump_enabled_p ())
10809 {
10810 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10811 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10812 dump_printf (MSG_NOTE, "\n");
10813 }
10814 }
10815
10816 /* Don't try to compute scalar types if the stmt produces a boolean
10817 vector; use the existing vector type instead. */
10818 tree nunits_vectype;
10819 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10820 nunits_vectype = vectype;
10821 else
10822 {
10823 /* The number of units is set according to the smallest scalar
10824 type (or the largest vector size, but we only support one
10825 vector size per vectorization). */
10826 if (*stmt_vectype_out != boolean_type_node)
10827 {
10828 HOST_WIDE_INT dummy;
10829 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10830 }
10831 if (dump_enabled_p ())
10832 {
10833 dump_printf_loc (MSG_NOTE, vect_location,
10834 "get vectype for scalar type: ");
10835 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10836 dump_printf (MSG_NOTE, "\n");
10837 }
10838 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10839 }
10840 if (!nunits_vectype)
10841 {
10842 if (dump_enabled_p ())
10843 {
10844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10845 "not vectorized: unsupported data-type ");
10846 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10847 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10848 }
10849 return false;
10850 }
10851
10852 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10853 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10854 {
10855 if (dump_enabled_p ())
10856 {
10857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10858 "not vectorized: different sized vector "
10859 "types in statement, ");
10860 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10861 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10862 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10863 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10864 }
10865 return false;
10866 }
10867
10868 if (dump_enabled_p ())
10869 {
10870 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10871 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10872 dump_printf (MSG_NOTE, "\n");
10873
10874 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10875 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10876 dump_printf (MSG_NOTE, "\n");
10877 }
10878
10879 *nunits_vectype_out = nunits_vectype;
10880 return true;
10881}
10882
10883/* Try to determine the correct vector type for STMT_INFO, which is a
10884 statement that produces a scalar boolean result. Return the vector
10885 type on success, otherwise return NULL_TREE. */
10886
10887tree
10888vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10889{
10890 gimple *stmt = stmt_info->stmt;
10891 tree mask_type = NULL;
10892 tree vectype, scalar_type;
10893
10894 if (is_gimple_assign (stmt)
10895 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10896 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10897 {
10898 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10899 mask_type = get_mask_type_for_scalar_type (scalar_type);
10900
10901 if (!mask_type)
10902 {
10903 if (dump_enabled_p ())
10904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10905 "not vectorized: unsupported mask\n");
10906 return NULL_TREE;
10907 }
10908 }
10909 else
10910 {
10911 tree rhs;
10912 ssa_op_iter iter;
1f3cb663
RS
10913 enum vect_def_type dt;
10914
10915 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10916 {
894dd753 10917 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
1f3cb663
RS
10918 {
10919 if (dump_enabled_p ())
10920 {
10921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10922 "not vectorized: can't compute mask type "
10923 "for statement, ");
10924 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10925 0);
10926 }
10927 return NULL_TREE;
10928 }
10929
10930 /* No vectype probably means external definition.
10931 Allow it in case there is another operand which
10932 allows to determine mask type. */
10933 if (!vectype)
10934 continue;
10935
10936 if (!mask_type)
10937 mask_type = vectype;
10938 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10939 TYPE_VECTOR_SUBPARTS (vectype)))
10940 {
10941 if (dump_enabled_p ())
10942 {
10943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10944 "not vectorized: different sized masks "
10945 "types in statement, ");
10946 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10947 mask_type);
10948 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10949 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10950 vectype);
10951 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10952 }
10953 return NULL_TREE;
10954 }
10955 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10956 != VECTOR_BOOLEAN_TYPE_P (vectype))
10957 {
10958 if (dump_enabled_p ())
10959 {
10960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10961 "not vectorized: mixed mask and "
10962 "nonmask vector types in statement, ");
10963 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10964 mask_type);
10965 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10966 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10967 vectype);
10968 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10969 }
10970 return NULL_TREE;
10971 }
10972 }
10973
10974 /* We may compare boolean value loaded as vector of integers.
10975 Fix mask_type in such case. */
10976 if (mask_type
10977 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10978 && gimple_code (stmt) == GIMPLE_ASSIGN
10979 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10980 mask_type = build_same_sized_truth_vector_type (mask_type);
10981 }
10982
10983 /* No mask_type should mean loop invariant predicate.
10984 This is probably a subject for optimization in if-conversion. */
10985 if (!mask_type && dump_enabled_p ())
10986 {
10987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10988 "not vectorized: can't compute mask type "
10989 "for statement, ");
10990 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10991 }
10992 return mask_type;
10993}