]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
[23/46] Make LOOP_VINFO_MAY_MISALIGN_STMTS use stmt_vec_info
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
68435eb2
RB
100
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109}
110
272c6793
RS
111/* Return a variable of type ELEM_TYPE[NELEMS]. */
112
113static tree
114create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
115{
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
118}
119
120/* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
124
125static tree
355fe088 126read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
127 tree array, unsigned HOST_WIDE_INT n)
128{
129 tree vect_type, vect, vect_name, array_ref;
355fe088 130 gimple *new_stmt;
272c6793
RS
131
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
138
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
143
144 return vect_name;
145}
146
147/* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
150
151static void
355fe088 152write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
153 tree array, unsigned HOST_WIDE_INT n)
154{
155 tree array_ref;
355fe088 156 gimple *new_stmt;
272c6793
RS
157
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
164}
165
166/* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
169
170static tree
44fc7854 171create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 172{
44fc7854 173 tree mem_ref;
272c6793 174
272c6793
RS
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
644ffefd 177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
178 return mem_ref;
179}
180
3ba4ff41
RS
181/* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
183
184static void
185vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
186{
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190}
191
ebfd146a
IR
192/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193
194/* Function vect_mark_relevant.
195
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197
198static void
355fe088 199vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 200 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
201{
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
205
73fbfcad 206 if (dump_enabled_p ())
66c16fd9
RB
207 {
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: ", relevant, live_p);
210 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 }
ebfd146a 212
83197f37
IR
213 /* If this stmt is an original stmt in a pattern, we might need to mark its
214 related pattern stmt instead of the original stmt. However, such stmts
215 may have their own uses that are not in any pattern, in such cases the
216 stmt itself should be marked. */
ebfd146a
IR
217 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 {
97ecdb46
JJ
219 /* This is the last stmt in a sequence that was detected as a
220 pattern that can potentially be vectorized. Don't mark the stmt
221 as relevant/live because it's not going to be vectorized.
222 Instead mark the pattern-stmt that replaces it. */
83197f37 223
97ecdb46
JJ
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_NOTE, vect_location,
226 "last stmt in pattern. don't mark"
227 " relevant/live.\n");
10681ce8
RS
228 stmt_vec_info old_stmt_info = stmt_info;
229 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
230 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
97ecdb46
JJ
231 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
232 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
10681ce8 233 stmt = stmt_info->stmt;
ebfd146a
IR
234 }
235
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
239
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
242 {
73fbfcad 243 if (dump_enabled_p ())
78c60e3d 244 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 245 "already marked relevant/live.\n");
ebfd146a
IR
246 return;
247 }
248
9771b263 249 worklist->safe_push (stmt);
ebfd146a
IR
250}
251
252
b28ead45
AH
253/* Function is_simple_and_all_uses_invariant
254
255 Return true if STMT is simple and all uses of it are invariant. */
256
257bool
258is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
259{
260 tree op;
b28ead45
AH
261 ssa_op_iter iter;
262
263 if (!is_gimple_assign (stmt))
264 return false;
265
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 {
268 enum vect_def_type dt = vect_uninitialized_def;
269
894dd753 270 if (!vect_is_simple_use (op, loop_vinfo, &dt))
b28ead45
AH
271 {
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
276 }
277
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
280 }
281 return true;
282}
283
ebfd146a
IR
284/* Function vect_stmt_relevant_p.
285
286 Return true if STMT in loop that is represented by LOOP_VINFO is
287 "relevant for vectorization".
288
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
293
294 CHECKME: what other side effects would the vectorizer allow? */
295
296static bool
355fe088 297vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
298 enum vect_relevant *relevant, bool *live_p)
299{
300 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
305
8644a673 306 *relevant = vect_unused_in_scope;
ebfd146a
IR
307 *live_p = false;
308
309 /* cond stmt other than loop exit cond. */
b8698a0f
L
310 if (is_ctrl_stmt (stmt)
311 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
312 != loop_exit_ctrl_vec_info_type)
8644a673 313 *relevant = vect_used_in_scope;
ebfd146a
IR
314
315 /* changing memory. */
316 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
317 if (gimple_vdef (stmt)
318 && !gimple_clobber_p (stmt))
ebfd146a 319 {
73fbfcad 320 if (dump_enabled_p ())
78c60e3d 321 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 322 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 323 *relevant = vect_used_in_scope;
ebfd146a
IR
324 }
325
326 /* uses outside the loop. */
327 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
328 {
329 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
330 {
331 basic_block bb = gimple_bb (USE_STMT (use_p));
332 if (!flow_bb_inside_loop_p (loop, bb))
333 {
73fbfcad 334 if (dump_enabled_p ())
78c60e3d 335 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 336 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 337
3157b0c2
AO
338 if (is_gimple_debug (USE_STMT (use_p)))
339 continue;
340
ebfd146a
IR
341 /* We expect all such uses to be in the loop exit phis
342 (because of loop closed form) */
343 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
344 gcc_assert (bb == single_exit (loop)->dest);
345
346 *live_p = true;
347 }
348 }
349 }
350
3a2edf4c
AH
351 if (*live_p && *relevant == vect_unused_in_scope
352 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
353 {
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt live but not relevant.\n");
357 *relevant = vect_used_only_live;
358 }
359
ebfd146a
IR
360 return (*live_p || *relevant);
361}
362
363
b8698a0f 364/* Function exist_non_indexing_operands_for_use_p
ebfd146a 365
ff802fa1 366 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
367 used in STMT for anything other than indexing an array. */
368
369static bool
355fe088 370exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
371{
372 tree operand;
373 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 374
ff802fa1 375 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
59a05b0c 380
ebfd146a
IR
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
386
387 'var' in the second case corresponds to a def, not a use,
b8698a0f 388 so USE cannot correspond to any operands that are not used
ebfd146a
IR
389 for array indexing.
390
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
ebfd146a
IR
393
394 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
395 {
396 if (is_gimple_call (stmt)
397 && gimple_call_internal_p (stmt))
bfaa08b7
RS
398 {
399 internal_fn ifn = gimple_call_internal_fn (stmt);
400 int mask_index = internal_fn_mask_index (ifn);
401 if (mask_index >= 0
402 && use == gimple_call_arg (stmt, mask_index))
403 return true;
f307441a
RS
404 int stored_value_index = internal_fn_stored_value_index (ifn);
405 if (stored_value_index >= 0
406 && use == gimple_call_arg (stmt, stored_value_index))
407 return true;
bfaa08b7
RS
408 if (internal_gather_scatter_fn_p (ifn)
409 && use == gimple_call_arg (stmt, 1))
410 return true;
bfaa08b7 411 }
5ce9450f
JJ
412 return false;
413 }
414
59a05b0c
EB
415 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
416 return false;
ebfd146a 417 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
418 if (TREE_CODE (operand) != SSA_NAME)
419 return false;
420
421 if (operand == use)
422 return true;
423
424 return false;
425}
426
427
b8698a0f 428/*
ebfd146a
IR
429 Function process_use.
430
431 Inputs:
432 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 433 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 434 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 435 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
436 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
437 be performed.
ebfd146a
IR
438
439 Outputs:
440 Generally, LIVE_P and RELEVANT are used to define the liveness and
441 relevance info of the DEF_STMT of this USE:
442 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
443 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
444 Exceptions:
445 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 446 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 447 of the respective DEF_STMT is left unchanged.
b8698a0f
L
448 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
449 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
450 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
451 be modified accordingly.
452
453 Return true if everything is as expected. Return false otherwise. */
454
455static bool
b28ead45 456process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 457 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 458 bool force)
ebfd146a 459{
ebfd146a
IR
460 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
461 stmt_vec_info dstmt_vinfo;
462 basic_block bb, def_bb;
ebfd146a
IR
463 enum vect_def_type dt;
464
b8698a0f 465 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 466 that are used for address computation are not considered relevant. */
aec7ae7d 467 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
468 return true;
469
fef96d8e 470 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
b8698a0f 471 {
73fbfcad 472 if (dump_enabled_p ())
78c60e3d 473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 474 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
475 return false;
476 }
477
fef96d8e 478 if (!dstmt_vinfo)
ebfd146a
IR
479 return true;
480
fef96d8e 481 def_bb = gimple_bb (dstmt_vinfo->stmt);
ebfd146a 482
fef96d8e
RS
483 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
484 DSTMT_VINFO must have already been processed, because this should be the
b8698a0f 485 only way that STMT, which is a reduction-phi, was put in the worklist,
fef96d8e 486 as there should be no other uses for DSTMT_VINFO in the loop. So we just
ebfd146a 487 check that everything is as expected, and we are done. */
ebfd146a
IR
488 bb = gimple_bb (stmt);
489 if (gimple_code (stmt) == GIMPLE_PHI
490 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
fef96d8e 491 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
ebfd146a
IR
492 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
493 && bb->loop_father == def_bb->loop_father)
494 {
73fbfcad 495 if (dump_enabled_p ())
78c60e3d 496 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 497 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a 498 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 499 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 500 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
501 return true;
502 }
503
504 /* case 3a: outer-loop stmt defining an inner-loop stmt:
505 outer-loop-header-bb:
fef96d8e 506 d = dstmt_vinfo
ebfd146a
IR
507 inner-loop:
508 stmt # use (d)
509 outer-loop-tail-bb:
510 ... */
511 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
512 {
73fbfcad 513 if (dump_enabled_p ())
78c60e3d 514 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 515 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 516
ebfd146a
IR
517 switch (relevant)
518 {
8644a673 519 case vect_unused_in_scope:
7c5222ff
IR
520 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
521 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 522 break;
7c5222ff 523
ebfd146a 524 case vect_used_in_outer_by_reduction:
7c5222ff 525 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
526 relevant = vect_used_by_reduction;
527 break;
7c5222ff 528
ebfd146a 529 case vect_used_in_outer:
7c5222ff 530 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 531 relevant = vect_used_in_scope;
ebfd146a 532 break;
7c5222ff 533
8644a673 534 case vect_used_in_scope:
ebfd146a
IR
535 break;
536
537 default:
538 gcc_unreachable ();
b8698a0f 539 }
ebfd146a
IR
540 }
541
542 /* case 3b: inner-loop stmt defining an outer-loop stmt:
543 outer-loop-header-bb:
544 ...
545 inner-loop:
fef96d8e 546 d = dstmt_vinfo
06066f92 547 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
548 stmt # use (d) */
549 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
550 {
73fbfcad 551 if (dump_enabled_p ())
78c60e3d 552 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 553 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 554
ebfd146a
IR
555 switch (relevant)
556 {
8644a673 557 case vect_unused_in_scope:
b8698a0f 558 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 559 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 560 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
561 break;
562
ebfd146a 563 case vect_used_by_reduction:
b28ead45 564 case vect_used_only_live:
ebfd146a
IR
565 relevant = vect_used_in_outer_by_reduction;
566 break;
567
8644a673 568 case vect_used_in_scope:
ebfd146a
IR
569 relevant = vect_used_in_outer;
570 break;
571
572 default:
573 gcc_unreachable ();
574 }
575 }
643a9684
RB
576 /* We are also not interested in uses on loop PHI backedges that are
577 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
578 and cause hybrid SLP for SLP inductions. Unless the PHI is live
579 of course. */
643a9684
RB
580 else if (gimple_code (stmt) == GIMPLE_PHI
581 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 582 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
583 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
584 == use))
585 {
586 if (dump_enabled_p ())
587 dump_printf_loc (MSG_NOTE, vect_location,
588 "induction value on backedge.\n");
589 return true;
590 }
591
ebfd146a 592
fef96d8e 593 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
ebfd146a
IR
594 return true;
595}
596
597
598/* Function vect_mark_stmts_to_be_vectorized.
599
600 Not all stmts in the loop need to be vectorized. For example:
601
602 for i...
603 for j...
604 1. T0 = i + j
605 2. T1 = a[T0]
606
607 3. j = j + 1
608
609 Stmt 1 and 3 do not need to be vectorized, because loop control and
610 addressing of vectorized data-refs are handled differently.
611
612 This pass detects such stmts. */
613
614bool
615vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
616{
ebfd146a
IR
617 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
618 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
619 unsigned int nbbs = loop->num_nodes;
620 gimple_stmt_iterator si;
355fe088 621 gimple *stmt;
ebfd146a
IR
622 unsigned int i;
623 stmt_vec_info stmt_vinfo;
624 basic_block bb;
355fe088 625 gimple *phi;
ebfd146a 626 bool live_p;
b28ead45 627 enum vect_relevant relevant;
ebfd146a 628
adac3a68 629 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
ebfd146a 630
355fe088 631 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
632
633 /* 1. Init worklist. */
634 for (i = 0; i < nbbs; i++)
635 {
636 bb = bbs[i];
637 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 638 {
ebfd146a 639 phi = gsi_stmt (si);
73fbfcad 640 if (dump_enabled_p ())
ebfd146a 641 {
78c60e3d
SS
642 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
643 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
644 }
645
646 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 647 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
648 }
649 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
650 {
651 stmt = gsi_stmt (si);
73fbfcad 652 if (dump_enabled_p ())
ebfd146a 653 {
78c60e3d
SS
654 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 656 }
ebfd146a
IR
657
658 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 659 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
660 }
661 }
662
663 /* 2. Process_worklist */
9771b263 664 while (worklist.length () > 0)
ebfd146a
IR
665 {
666 use_operand_p use_p;
667 ssa_op_iter iter;
668
9771b263 669 stmt = worklist.pop ();
73fbfcad 670 if (dump_enabled_p ())
ebfd146a 671 {
78c60e3d
SS
672 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
673 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
674 }
675
b8698a0f 676 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
677 (DEF_STMT) as relevant/irrelevant according to the relevance property
678 of STMT. */
ebfd146a
IR
679 stmt_vinfo = vinfo_for_stmt (stmt);
680 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 681
b28ead45
AH
682 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
683 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
684
685 One exception is when STMT has been identified as defining a reduction
b28ead45 686 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 687 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 688 those that are used by a reduction computation, and those that are
ff802fa1 689 (also) used by a regular computation. This allows us later on to
b8698a0f 690 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 691 order of the results that they produce does not have to be kept. */
ebfd146a 692
b28ead45 693 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 694 {
06066f92 695 case vect_reduction_def:
b28ead45
AH
696 gcc_assert (relevant != vect_unused_in_scope);
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_in_scope
699 && relevant != vect_used_by_reduction
700 && relevant != vect_used_only_live)
06066f92 701 {
b28ead45
AH
702 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
704 "unsupported use of reduction.\n");
705 return false;
06066f92 706 }
06066f92 707 break;
b8698a0f 708
06066f92 709 case vect_nested_cycle:
b28ead45
AH
710 if (relevant != vect_unused_in_scope
711 && relevant != vect_used_in_outer_by_reduction
712 && relevant != vect_used_in_outer)
06066f92 713 {
73fbfcad 714 if (dump_enabled_p ())
78c60e3d 715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 716 "unsupported use of nested cycle.\n");
7c5222ff 717
06066f92
IR
718 return false;
719 }
b8698a0f
L
720 break;
721
06066f92 722 case vect_double_reduction_def:
b28ead45
AH
723 if (relevant != vect_unused_in_scope
724 && relevant != vect_used_by_reduction
725 && relevant != vect_used_only_live)
06066f92 726 {
73fbfcad 727 if (dump_enabled_p ())
78c60e3d 728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 729 "unsupported use of double reduction.\n");
7c5222ff 730
7c5222ff 731 return false;
06066f92 732 }
b8698a0f 733 break;
7c5222ff 734
06066f92
IR
735 default:
736 break;
7c5222ff 737 }
b8698a0f 738
aec7ae7d 739 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
740 {
741 /* Pattern statements are not inserted into the code, so
742 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
743 have to scan the RHS or function arguments instead. */
744 if (is_gimple_assign (stmt))
745 {
69d2aade
JJ
746 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
747 tree op = gimple_assign_rhs1 (stmt);
748
749 i = 1;
750 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
751 {
752 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 753 relevant, &worklist, false)
69d2aade 754 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 755 relevant, &worklist, false))
566d377a 756 return false;
69d2aade
JJ
757 i = 2;
758 }
759 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 760 {
69d2aade 761 op = gimple_op (stmt, i);
afbe6325 762 if (TREE_CODE (op) == SSA_NAME
b28ead45 763 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 764 &worklist, false))
07687835 765 return false;
9d5e7640
IR
766 }
767 }
768 else if (is_gimple_call (stmt))
769 {
770 for (i = 0; i < gimple_call_num_args (stmt); i++)
771 {
772 tree arg = gimple_call_arg (stmt, i);
b28ead45 773 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 774 &worklist, false))
07687835 775 return false;
9d5e7640
IR
776 }
777 }
778 }
779 else
780 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
781 {
782 tree op = USE_FROM_PTR (use_p);
b28ead45 783 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 784 &worklist, false))
07687835 785 return false;
9d5e7640 786 }
aec7ae7d 787
3bab6342 788 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 789 {
134c85ca
RS
790 gather_scatter_info gs_info;
791 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
792 gcc_unreachable ();
793 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
794 &worklist, true))
566d377a 795 return false;
aec7ae7d 796 }
ebfd146a
IR
797 } /* while worklist */
798
ebfd146a
IR
799 return true;
800}
801
68435eb2
RB
802/* Compute the prologue cost for invariant or constant operands. */
803
804static unsigned
805vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
806 unsigned opno, enum vect_def_type dt,
807 stmt_vector_for_cost *cost_vec)
808{
b9787581 809 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
68435eb2
RB
810 tree op = gimple_op (stmt, opno);
811 unsigned prologue_cost = 0;
812
813 /* Without looking at the actual initializer a vector of
814 constants can be implemented as load from the constant pool.
815 When all elements are the same we can use a splat. */
816 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
817 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
818 unsigned num_vects_to_check;
819 unsigned HOST_WIDE_INT const_nunits;
820 unsigned nelt_limit;
821 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
822 && ! multiple_p (const_nunits, group_size))
823 {
824 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
825 nelt_limit = const_nunits;
826 }
827 else
828 {
829 /* If either the vector has variable length or the vectors
830 are composed of repeated whole groups we only need to
831 cost construction once. All vectors will be the same. */
832 num_vects_to_check = 1;
833 nelt_limit = group_size;
834 }
835 tree elt = NULL_TREE;
836 unsigned nelt = 0;
837 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
838 {
839 unsigned si = j % group_size;
840 if (nelt == 0)
b9787581 841 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
68435eb2
RB
842 /* ??? We're just tracking whether all operands of a single
843 vector initializer are the same, ideally we'd check if
844 we emitted the same one already. */
b9787581 845 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
68435eb2
RB
846 opno))
847 elt = NULL_TREE;
848 nelt++;
849 if (nelt == nelt_limit)
850 {
851 /* ??? We need to pass down stmt_info for a vector type
852 even if it points to the wrong stmt. */
853 prologue_cost += record_stmt_cost
854 (cost_vec, 1,
855 dt == vect_external_def
856 ? (elt ? scalar_to_vec : vec_construct)
857 : vector_load,
858 stmt_info, 0, vect_prologue);
859 nelt = 0;
860 }
861 }
862
863 return prologue_cost;
864}
ebfd146a 865
b8698a0f 866/* Function vect_model_simple_cost.
ebfd146a 867
b8698a0f 868 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
869 single op. Right now, this does not account for multiple insns that could
870 be generated for the single vector op. We will handle that shortly. */
871
68435eb2 872static void
b8698a0f 873vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 874 enum vect_def_type *dt,
4fc5ebf1 875 int ndts,
68435eb2
RB
876 slp_tree node,
877 stmt_vector_for_cost *cost_vec)
ebfd146a 878{
92345349 879 int inside_cost = 0, prologue_cost = 0;
ebfd146a 880
68435eb2 881 gcc_assert (cost_vec != NULL);
ebfd146a 882
68435eb2
RB
883 /* ??? Somehow we need to fix this at the callers. */
884 if (node)
885 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
886
887 if (node)
888 {
889 /* Scan operands and account for prologue cost of constants/externals.
890 ??? This over-estimates cost for multiple uses and should be
891 re-engineered. */
b9787581 892 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
68435eb2
RB
893 tree lhs = gimple_get_lhs (stmt);
894 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
895 {
896 tree op = gimple_op (stmt, i);
68435eb2
RB
897 enum vect_def_type dt;
898 if (!op || op == lhs)
899 continue;
894dd753 900 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
68435eb2
RB
901 && (dt == vect_constant_def || dt == vect_external_def))
902 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
903 i, dt, cost_vec);
904 }
905 }
906 else
907 /* Cost the "broadcast" of a scalar operand in to a vector operand.
908 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
909 cost model. */
910 for (int i = 0; i < ndts; i++)
911 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
912 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
913 stmt_info, 0, vect_prologue);
914
915 /* Adjust for two-operator SLP nodes. */
916 if (node && SLP_TREE_TWO_OPERATORS (node))
917 {
918 ncopies *= 2;
919 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
920 stmt_info, 0, vect_body);
921 }
c3e7ee41
BS
922
923 /* Pass the inside-of-loop statements to the target-specific cost model. */
68435eb2
RB
924 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
925 stmt_info, 0, vect_body);
c3e7ee41 926
73fbfcad 927 if (dump_enabled_p ())
78c60e3d
SS
928 dump_printf_loc (MSG_NOTE, vect_location,
929 "vect_model_simple_cost: inside_cost = %d, "
e645e942 930 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
931}
932
933
8bd37302
BS
934/* Model cost for type demotion and promotion operations. PWR is normally
935 zero for single-step promotions and demotions. It will be one if
936 two-step promotion/demotion is required, and so on. Each additional
937 step doubles the number of instructions required. */
938
939static void
940vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
68435eb2
RB
941 enum vect_def_type *dt, int pwr,
942 stmt_vector_for_cost *cost_vec)
8bd37302
BS
943{
944 int i, tmp;
92345349 945 int inside_cost = 0, prologue_cost = 0;
c3e7ee41 946
8bd37302
BS
947 for (i = 0; i < pwr + 1; i++)
948 {
949 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
950 (i + 1) : i;
68435eb2
RB
951 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
952 vec_promote_demote, stmt_info, 0,
953 vect_body);
8bd37302
BS
954 }
955
956 /* FORNOW: Assuming maximum 2 args per stmts. */
957 for (i = 0; i < 2; i++)
92345349 958 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
68435eb2
RB
959 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
960 stmt_info, 0, vect_prologue);
8bd37302 961
73fbfcad 962 if (dump_enabled_p ())
78c60e3d
SS
963 dump_printf_loc (MSG_NOTE, vect_location,
964 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 965 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
966}
967
ebfd146a
IR
968/* Function vect_model_store_cost
969
0d0293ac
MM
970 Models cost for stores. In the case of grouped accesses, one access
971 has the overhead of the grouped access attributed to it. */
ebfd146a 972
68435eb2 973static void
b8698a0f 974vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
68435eb2 975 enum vect_def_type dt,
2de001ee 976 vect_memory_access_type memory_access_type,
9ce4345a 977 vec_load_store_type vls_type, slp_tree slp_node,
68435eb2 978 stmt_vector_for_cost *cost_vec)
ebfd146a 979{
92345349 980 unsigned int inside_cost = 0, prologue_cost = 0;
bffb8014 981 stmt_vec_info first_stmt_info = stmt_info;
892a981f 982 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 983
68435eb2
RB
984 /* ??? Somehow we need to fix this at the callers. */
985 if (slp_node)
986 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
987
9ce4345a 988 if (vls_type == VLS_STORE_INVARIANT)
68435eb2
RB
989 {
990 if (slp_node)
991 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
992 1, dt, cost_vec);
993 else
994 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
995 stmt_info, 0, vect_prologue);
996 }
ebfd146a 997
892a981f
RS
998 /* Grouped stores update all elements in the group at once,
999 so we want the DR for the first statement. */
1000 if (!slp_node && grouped_access_p)
bffb8014 1001 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1002
892a981f
RS
1003 /* True if we should include any once-per-group costs as well as
1004 the cost of the statement itself. For SLP we only get called
1005 once per group anyhow. */
bffb8014 1006 bool first_stmt_p = (first_stmt_info == stmt_info);
892a981f 1007
272c6793 1008 /* We assume that the cost of a single store-lanes instruction is
2c53b149 1009 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
272c6793 1010 access is instead being provided by a permute-and-store operation,
2de001ee
RS
1011 include the cost of the permutes. */
1012 if (first_stmt_p
1013 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1014 {
e1377713
ES
1015 /* Uses a high and low interleave or shuffle operations for each
1016 needed permute. */
bffb8014 1017 int group_size = DR_GROUP_SIZE (first_stmt_info);
e1377713 1018 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2 1019 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
92345349 1020 stmt_info, 0, vect_body);
ebfd146a 1021
73fbfcad 1022 if (dump_enabled_p ())
78c60e3d 1023 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1024 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 1025 group_size);
ebfd146a
IR
1026 }
1027
cee62fee 1028 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 1029 /* Costs of the stores. */
067bc855
RB
1030 if (memory_access_type == VMAT_ELEMENTWISE
1031 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
1032 {
1033 /* N scalar stores plus extracting the elements. */
1034 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1035 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1036 ncopies * assumed_nunits,
1037 scalar_store, stmt_info, 0, vect_body);
1038 }
f2e2a985 1039 else
57c454d2 1040 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
ebfd146a 1041
2de001ee
RS
1042 if (memory_access_type == VMAT_ELEMENTWISE
1043 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
1044 {
1045 /* N scalar stores plus extracting the elements. */
1046 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1047 inside_cost += record_stmt_cost (cost_vec,
c5126ce8
RS
1048 ncopies * assumed_nunits,
1049 vec_to_scalar, stmt_info, 0, vect_body);
1050 }
cee62fee 1051
73fbfcad 1052 if (dump_enabled_p ())
78c60e3d
SS
1053 dump_printf_loc (MSG_NOTE, vect_location,
1054 "vect_model_store_cost: inside_cost = %d, "
e645e942 1055 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
1056}
1057
1058
720f5239
IR
1059/* Calculate cost of DR's memory access. */
1060void
57c454d2 1061vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1062 unsigned int *inside_cost,
92345349 1063 stmt_vector_for_cost *body_cost_vec)
720f5239 1064{
57c454d2 1065 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1066 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1067
1068 switch (alignment_support_scheme)
1069 {
1070 case dr_aligned:
1071 {
92345349
BS
1072 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1073 vector_store, stmt_info, 0,
1074 vect_body);
720f5239 1075
73fbfcad 1076 if (dump_enabled_p ())
78c60e3d 1077 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1078 "vect_model_store_cost: aligned.\n");
720f5239
IR
1079 break;
1080 }
1081
1082 case dr_unaligned_supported:
1083 {
720f5239 1084 /* Here, we assign an additional cost for the unaligned store. */
92345349 1085 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1086 unaligned_store, stmt_info,
92345349 1087 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1088 if (dump_enabled_p ())
78c60e3d
SS
1089 dump_printf_loc (MSG_NOTE, vect_location,
1090 "vect_model_store_cost: unaligned supported by "
e645e942 1091 "hardware.\n");
720f5239
IR
1092 break;
1093 }
1094
38eec4c6
UW
1095 case dr_unaligned_unsupported:
1096 {
1097 *inside_cost = VECT_MAX_COST;
1098
73fbfcad 1099 if (dump_enabled_p ())
78c60e3d 1100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1101 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1102 break;
1103 }
1104
720f5239
IR
1105 default:
1106 gcc_unreachable ();
1107 }
1108}
1109
1110
ebfd146a
IR
1111/* Function vect_model_load_cost
1112
892a981f
RS
1113 Models cost for loads. In the case of grouped accesses, one access has
1114 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1115 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1116 access scheme chosen. */
1117
68435eb2
RB
1118static void
1119vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
2de001ee 1120 vect_memory_access_type memory_access_type,
68435eb2 1121 slp_instance instance,
2de001ee 1122 slp_tree slp_node,
68435eb2 1123 stmt_vector_for_cost *cost_vec)
ebfd146a 1124{
92345349 1125 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1126 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1127
68435eb2
RB
1128 gcc_assert (cost_vec);
1129
1130 /* ??? Somehow we need to fix this at the callers. */
1131 if (slp_node)
1132 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1133
1134 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1135 {
1136 /* If the load is permuted then the alignment is determined by
1137 the first group element not by the first scalar stmt DR. */
bffb8014 1138 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
68435eb2
RB
1139 /* Record the cost for the permutation. */
1140 unsigned n_perms;
1141 unsigned assumed_nunits
bffb8014 1142 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
68435eb2
RB
1143 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1144 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1145 slp_vf, instance, true,
1146 &n_perms);
1147 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
bffb8014 1148 first_stmt_info, 0, vect_body);
68435eb2
RB
1149 /* And adjust the number of loads performed. This handles
1150 redundancies as well as loads that are later dead. */
bffb8014 1151 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
68435eb2
RB
1152 bitmap_clear (perm);
1153 for (unsigned i = 0;
1154 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1155 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1156 ncopies = 0;
1157 bool load_seen = false;
bffb8014 1158 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
68435eb2
RB
1159 {
1160 if (i % assumed_nunits == 0)
1161 {
1162 if (load_seen)
1163 ncopies++;
1164 load_seen = false;
1165 }
1166 if (bitmap_bit_p (perm, i))
1167 load_seen = true;
1168 }
1169 if (load_seen)
1170 ncopies++;
1171 gcc_assert (ncopies
bffb8014
RS
1172 <= (DR_GROUP_SIZE (first_stmt_info)
1173 - DR_GROUP_GAP (first_stmt_info)
68435eb2
RB
1174 + assumed_nunits - 1) / assumed_nunits);
1175 }
1176
892a981f
RS
1177 /* Grouped loads read all elements in the group at once,
1178 so we want the DR for the first statement. */
bffb8014 1179 stmt_vec_info first_stmt_info = stmt_info;
892a981f 1180 if (!slp_node && grouped_access_p)
bffb8014 1181 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a 1182
892a981f
RS
1183 /* True if we should include any once-per-group costs as well as
1184 the cost of the statement itself. For SLP we only get called
1185 once per group anyhow. */
bffb8014 1186 bool first_stmt_p = (first_stmt_info == stmt_info);
892a981f 1187
272c6793 1188 /* We assume that the cost of a single load-lanes instruction is
2c53b149 1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
272c6793 1190 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1191 include the cost of the permutes. */
1192 if (first_stmt_p
1193 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1194 {
2c23db6d
ES
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
bffb8014 1197 int group_size = DR_GROUP_SIZE (first_stmt_info);
2c23db6d 1198 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
68435eb2
RB
1199 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200 stmt_info, 0, vect_body);
ebfd146a 1201
73fbfcad 1202 if (dump_enabled_p ())
e645e942
TJ
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1205 group_size);
ebfd146a
IR
1206 }
1207
1208 /* The loads themselves. */
067bc855
RB
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1211 {
a21892ad
BS
1212 /* N scalar loads plus gathering them into a vector. */
1213 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1214 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
68435eb2 1215 inside_cost += record_stmt_cost (cost_vec,
c5126ce8 1216 ncopies * assumed_nunits,
92345349 1217 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1218 }
1219 else
57c454d2 1220 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
92345349 1221 &inside_cost, &prologue_cost,
68435eb2 1222 cost_vec, cost_vec, true);
2de001ee
RS
1223 if (memory_access_type == VMAT_ELEMENTWISE
1224 || memory_access_type == VMAT_STRIDED_SLP)
68435eb2 1225 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
892a981f 1226 stmt_info, 0, vect_body);
720f5239 1227
73fbfcad 1228 if (dump_enabled_p ())
78c60e3d
SS
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: inside_cost = %d, "
e645e942 1231 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1232}
1233
1234
1235/* Calculate cost of DR's memory access. */
1236void
57c454d2 1237vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
c3e7ee41 1238 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1239 unsigned int *prologue_cost,
1240 stmt_vector_for_cost *prologue_cost_vec,
1241 stmt_vector_for_cost *body_cost_vec,
1242 bool record_prologue_costs)
720f5239 1243{
57c454d2 1244 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
720f5239
IR
1245 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1246
1247 switch (alignment_support_scheme)
ebfd146a
IR
1248 {
1249 case dr_aligned:
1250 {
92345349
BS
1251 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1252 stmt_info, 0, vect_body);
ebfd146a 1253
73fbfcad 1254 if (dump_enabled_p ())
78c60e3d 1255 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1256 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1257
1258 break;
1259 }
1260 case dr_unaligned_supported:
1261 {
720f5239 1262 /* Here, we assign an additional cost for the unaligned load. */
92345349 1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1264 unaligned_load, stmt_info,
92345349 1265 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1266
73fbfcad 1267 if (dump_enabled_p ())
78c60e3d
SS
1268 dump_printf_loc (MSG_NOTE, vect_location,
1269 "vect_model_load_cost: unaligned supported by "
e645e942 1270 "hardware.\n");
ebfd146a
IR
1271
1272 break;
1273 }
1274 case dr_explicit_realign:
1275 {
92345349
BS
1276 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1277 vector_load, stmt_info, 0, vect_body);
1278 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1279 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1280
1281 /* FIXME: If the misalignment remains fixed across the iterations of
1282 the containing loop, the following cost should be added to the
92345349 1283 prologue costs. */
ebfd146a 1284 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1285 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1286 stmt_info, 0, vect_body);
ebfd146a 1287
73fbfcad 1288 if (dump_enabled_p ())
e645e942
TJ
1289 dump_printf_loc (MSG_NOTE, vect_location,
1290 "vect_model_load_cost: explicit realign\n");
8bd37302 1291
ebfd146a
IR
1292 break;
1293 }
1294 case dr_explicit_realign_optimized:
1295 {
73fbfcad 1296 if (dump_enabled_p ())
e645e942 1297 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1298 "vect_model_load_cost: unaligned software "
e645e942 1299 "pipelined.\n");
ebfd146a
IR
1300
1301 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1302 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1303 if this is an access in a group of loads, which provide grouped
ebfd146a 1304 access, then the above cost should only be considered for one
ff802fa1 1305 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1306 and a realignment op. */
1307
92345349 1308 if (add_realign_cost && record_prologue_costs)
ebfd146a 1309 {
92345349
BS
1310 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1311 vector_stmt, stmt_info,
1312 0, vect_prologue);
ebfd146a 1313 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1314 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1315 vector_stmt, stmt_info,
1316 0, vect_prologue);
ebfd146a
IR
1317 }
1318
92345349
BS
1319 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1320 stmt_info, 0, vect_body);
1321 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1322 stmt_info, 0, vect_body);
8bd37302 1323
73fbfcad 1324 if (dump_enabled_p ())
78c60e3d 1325 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1326 "vect_model_load_cost: explicit realign optimized"
1327 "\n");
8bd37302 1328
ebfd146a
IR
1329 break;
1330 }
1331
38eec4c6
UW
1332 case dr_unaligned_unsupported:
1333 {
1334 *inside_cost = VECT_MAX_COST;
1335
73fbfcad 1336 if (dump_enabled_p ())
78c60e3d 1337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1338 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1339 break;
1340 }
1341
ebfd146a
IR
1342 default:
1343 gcc_unreachable ();
1344 }
ebfd146a
IR
1345}
1346
418b7df3
RG
1347/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1348 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1349
418b7df3 1350static void
355fe088 1351vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1352{
ebfd146a 1353 if (gsi)
418b7df3 1354 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1355 else
1356 {
418b7df3 1357 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1358 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1359
a70d6342
IR
1360 if (loop_vinfo)
1361 {
1362 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1363 basic_block new_bb;
1364 edge pe;
a70d6342
IR
1365
1366 if (nested_in_vect_loop_p (loop, stmt))
1367 loop = loop->inner;
b8698a0f 1368
a70d6342 1369 pe = loop_preheader_edge (loop);
418b7df3 1370 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1371 gcc_assert (!new_bb);
1372 }
1373 else
1374 {
1375 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1376 basic_block bb;
1377 gimple_stmt_iterator gsi_bb_start;
1378
1379 gcc_assert (bb_vinfo);
1380 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1381 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1382 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1383 }
ebfd146a
IR
1384 }
1385
73fbfcad 1386 if (dump_enabled_p ())
ebfd146a 1387 {
78c60e3d
SS
1388 dump_printf_loc (MSG_NOTE, vect_location,
1389 "created new init_stmt: ");
1390 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1391 }
418b7df3
RG
1392}
1393
1394/* Function vect_init_vector.
ebfd146a 1395
5467ee52
RG
1396 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1397 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1398 vector type a vector with all elements equal to VAL is created first.
1399 Place the initialization at BSI if it is not NULL. Otherwise, place the
1400 initialization at the loop preheader.
418b7df3
RG
1401 Return the DEF of INIT_STMT.
1402 It will be used in the vectorization of STMT. */
1403
1404tree
355fe088 1405vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1406{
355fe088 1407 gimple *init_stmt;
418b7df3
RG
1408 tree new_temp;
1409
e412ece4
RB
1410 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1411 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1412 {
e412ece4
RB
1413 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1414 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1415 {
5a308cf1
IE
1416 /* Scalar boolean value should be transformed into
1417 all zeros or all ones value before building a vector. */
1418 if (VECTOR_BOOLEAN_TYPE_P (type))
1419 {
b3d51f23
IE
1420 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1421 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1422
1423 if (CONSTANT_CLASS_P (val))
1424 val = integer_zerop (val) ? false_val : true_val;
1425 else
1426 {
1427 new_temp = make_ssa_name (TREE_TYPE (type));
1428 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1429 val, true_val, false_val);
1430 vect_init_vector_1 (stmt, init_stmt, gsi);
1431 val = new_temp;
1432 }
1433 }
1434 else if (CONSTANT_CLASS_P (val))
42fd8198 1435 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1436 else
1437 {
b731b390 1438 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1439 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1440 init_stmt = gimple_build_assign (new_temp,
1441 fold_build1 (VIEW_CONVERT_EXPR,
1442 TREE_TYPE (type),
1443 val));
1444 else
1445 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1446 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1447 val = new_temp;
418b7df3
RG
1448 }
1449 }
5467ee52 1450 val = build_vector_from_val (type, val);
418b7df3
RG
1451 }
1452
0e22bb5a
RB
1453 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1454 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1455 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1456 return new_temp;
ebfd146a
IR
1457}
1458
c83a894c 1459/* Function vect_get_vec_def_for_operand_1.
a70d6342 1460
c83a894c
AH
1461 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1462 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1463
1464tree
c83a894c 1465vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1466{
1467 tree vec_oprnd;
1eede195 1468 stmt_vec_info vec_stmt_info;
ebfd146a 1469 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1470
1471 switch (dt)
1472 {
81c40241 1473 /* operand is a constant or a loop invariant. */
ebfd146a 1474 case vect_constant_def:
81c40241 1475 case vect_external_def:
c83a894c
AH
1476 /* Code should use vect_get_vec_def_for_operand. */
1477 gcc_unreachable ();
ebfd146a 1478
81c40241 1479 /* operand is defined inside the loop. */
8644a673 1480 case vect_internal_def:
ebfd146a 1481 {
ebfd146a
IR
1482 /* Get the def from the vectorized stmt. */
1483 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1484
1eede195
RS
1485 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1486 /* Get vectorized pattern statement. */
1487 if (!vec_stmt_info
1488 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1489 && !STMT_VINFO_RELEVANT (def_stmt_info))
1490 vec_stmt_info = (STMT_VINFO_VEC_STMT
1491 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1492 gcc_assert (vec_stmt_info);
1493 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1494 vec_oprnd = PHI_RESULT (phi);
ebfd146a 1495 else
1eede195
RS
1496 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1497 return vec_oprnd;
ebfd146a
IR
1498 }
1499
c78e3652 1500 /* operand is defined by a loop header phi. */
ebfd146a 1501 case vect_reduction_def:
06066f92 1502 case vect_double_reduction_def:
7c5222ff 1503 case vect_nested_cycle:
ebfd146a
IR
1504 case vect_induction_def:
1505 {
1506 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1507
1eede195
RS
1508 /* Get the def from the vectorized stmt. */
1509 def_stmt_info = vinfo_for_stmt (def_stmt);
1510 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1511 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1512 vec_oprnd = PHI_RESULT (phi);
6dbbece6 1513 else
1eede195
RS
1514 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1515 return vec_oprnd;
ebfd146a
IR
1516 }
1517
1518 default:
1519 gcc_unreachable ();
1520 }
1521}
1522
1523
c83a894c
AH
1524/* Function vect_get_vec_def_for_operand.
1525
1526 OP is an operand in STMT. This function returns a (vector) def that will be
1527 used in the vectorized stmt for STMT.
1528
1529 In the case that OP is an SSA_NAME which is defined in the loop, then
1530 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1531
1532 In case OP is an invariant or constant, a new stmt that creates a vector def
1533 needs to be introduced. VECTYPE may be used to specify a required type for
1534 vector invariant. */
1535
1536tree
1537vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1538{
1539 gimple *def_stmt;
1540 enum vect_def_type dt;
1541 bool is_simple_use;
1542 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1543 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1544
1545 if (dump_enabled_p ())
1546 {
1547 dump_printf_loc (MSG_NOTE, vect_location,
1548 "vect_get_vec_def_for_operand: ");
1549 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1550 dump_printf (MSG_NOTE, "\n");
1551 }
1552
fef96d8e
RS
1553 stmt_vec_info def_stmt_info;
1554 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1555 &def_stmt_info, &def_stmt);
c83a894c
AH
1556 gcc_assert (is_simple_use);
1557 if (def_stmt && dump_enabled_p ())
1558 {
1559 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1560 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1561 }
1562
1563 if (dt == vect_constant_def || dt == vect_external_def)
1564 {
1565 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1566 tree vector_type;
1567
1568 if (vectype)
1569 vector_type = vectype;
2568d8a1 1570 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1571 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1572 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1573 else
1574 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1575
1576 gcc_assert (vector_type);
1577 return vect_init_vector (stmt, op, vector_type, NULL);
1578 }
1579 else
fef96d8e 1580 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
c83a894c
AH
1581}
1582
1583
ebfd146a
IR
1584/* Function vect_get_vec_def_for_stmt_copy
1585
ff802fa1 1586 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1587 vectorized stmt to be created (by the caller to this function) is a "copy"
1588 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1589 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1590 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1591 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1592 DT is the type of the vector def VEC_OPRND.
1593
1594 Context:
1595 In case the vectorization factor (VF) is bigger than the number
1596 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1597 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1598 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1599 smallest data-type determines the VF, and as a result, when vectorizing
1600 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1601 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1602 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1603 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1604 which VF=16 and nunits=4, so the number of copies required is 4):
1605
1606 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1607
ebfd146a
IR
1608 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1609 VS1.1: vx.1 = memref1 VS1.2
1610 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1611 VS1.3: vx.3 = memref3
ebfd146a
IR
1612
1613 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1614 VSnew.1: vz1 = vx.1 + ... VSnew.2
1615 VSnew.2: vz2 = vx.2 + ... VSnew.3
1616 VSnew.3: vz3 = vx.3 + ...
1617
1618 The vectorization of S1 is explained in vectorizable_load.
1619 The vectorization of S2:
b8698a0f
L
1620 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1621 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1622 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1623 returns the vector-def 'vx.0'.
1624
b8698a0f
L
1625 To create the remaining copies of the vector-stmt (VSnew.j), this
1626 function is called to get the relevant vector-def for each operand. It is
1627 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1628 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1629
b8698a0f
L
1630 For example, to obtain the vector-def 'vx.1' in order to create the
1631 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1632 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1633 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1634 and return its def ('vx.1').
1635 Overall, to create the above sequence this function will be called 3 times:
1636 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1637 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1638 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1639
1640tree
1641vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1642{
355fe088 1643 gimple *vec_stmt_for_operand;
ebfd146a
IR
1644 stmt_vec_info def_stmt_info;
1645
1646 /* Do nothing; can reuse same def. */
8644a673 1647 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1648 return vec_oprnd;
1649
1650 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1651 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1652 gcc_assert (def_stmt_info);
1653 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1654 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1655 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1656 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1657 else
1658 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1659 return vec_oprnd;
1660}
1661
1662
1663/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1664 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1665
c78e3652 1666void
b8698a0f 1667vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1668 vec<tree> *vec_oprnds0,
1669 vec<tree> *vec_oprnds1)
ebfd146a 1670{
9771b263 1671 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1672
1673 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1674 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1675
9771b263 1676 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1677 {
9771b263 1678 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1679 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1680 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1681 }
1682}
1683
1684
c78e3652 1685/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1686
c78e3652 1687void
355fe088 1688vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1689 vec<tree> *vec_oprnds0,
1690 vec<tree> *vec_oprnds1,
306b0c92 1691 slp_tree slp_node)
ebfd146a
IR
1692{
1693 if (slp_node)
d092494c
IR
1694 {
1695 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1696 auto_vec<tree> ops (nops);
1697 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1698
9771b263 1699 ops.quick_push (op0);
d092494c 1700 if (op1)
9771b263 1701 ops.quick_push (op1);
d092494c 1702
306b0c92 1703 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1704
37b5ec8f 1705 *vec_oprnds0 = vec_defs[0];
d092494c 1706 if (op1)
37b5ec8f 1707 *vec_oprnds1 = vec_defs[1];
d092494c 1708 }
ebfd146a
IR
1709 else
1710 {
1711 tree vec_oprnd;
1712
9771b263 1713 vec_oprnds0->create (1);
81c40241 1714 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1715 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1716
1717 if (op1)
1718 {
9771b263 1719 vec_oprnds1->create (1);
81c40241 1720 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1721 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1722 }
1723 }
1724}
1725
bb6c2b68
RS
1726/* Helper function called by vect_finish_replace_stmt and
1727 vect_finish_stmt_generation. Set the location of the new
e1bd7296 1728 statement and create and return a stmt_vec_info for it. */
bb6c2b68 1729
e1bd7296 1730static stmt_vec_info
bb6c2b68
RS
1731vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1732{
1733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1734 vec_info *vinfo = stmt_info->vinfo;
1735
e1bd7296 1736 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
bb6c2b68
RS
1737
1738 if (dump_enabled_p ())
1739 {
1740 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1741 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1742 }
1743
1744 gimple_set_location (vec_stmt, gimple_location (stmt));
1745
1746 /* While EH edges will generally prevent vectorization, stmt might
1747 e.g. be in a must-not-throw region. Ensure newly created stmts
1748 that could throw are part of the same region. */
1749 int lp_nr = lookup_stmt_eh_lp (stmt);
1750 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1751 add_stmt_to_eh_lp (vec_stmt, lp_nr);
e1bd7296
RS
1752
1753 return vec_stmt_info;
bb6c2b68
RS
1754}
1755
1756/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
e1bd7296
RS
1757 which sets the same scalar result as STMT did. Create and return a
1758 stmt_vec_info for VEC_STMT. */
bb6c2b68 1759
e1bd7296 1760stmt_vec_info
bb6c2b68
RS
1761vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1762{
1763 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1764
1765 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1766 gsi_replace (&gsi, vec_stmt, false);
1767
e1bd7296 1768 return vect_finish_stmt_generation_1 (stmt, vec_stmt);
bb6c2b68 1769}
ebfd146a 1770
e1bd7296
RS
1771/* Add VEC_STMT to the vectorized implementation of STMT and insert it
1772 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
ebfd146a 1773
e1bd7296 1774stmt_vec_info
355fe088 1775vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1776 gimple_stmt_iterator *gsi)
1777{
ebfd146a
IR
1778 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1779
54e8e2c3
RG
1780 if (!gsi_end_p (*gsi)
1781 && gimple_has_mem_ops (vec_stmt))
1782 {
355fe088 1783 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1784 tree vuse = gimple_vuse (at_stmt);
1785 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1786 {
1787 tree vdef = gimple_vdef (at_stmt);
1788 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1789 /* If we have an SSA vuse and insert a store, update virtual
1790 SSA form to avoid triggering the renamer. Do so only
1791 if we can easily see all uses - which is what almost always
1792 happens with the way vectorized stmts are inserted. */
1793 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1794 && ((is_gimple_assign (vec_stmt)
1795 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1796 || (is_gimple_call (vec_stmt)
1797 && !(gimple_call_flags (vec_stmt)
1798 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1799 {
1800 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1801 gimple_set_vdef (vec_stmt, new_vdef);
1802 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1803 }
1804 }
1805 }
ebfd146a 1806 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
e1bd7296 1807 return vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1808}
1809
70439f0d
RS
1810/* We want to vectorize a call to combined function CFN with function
1811 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1812 as the types of all inputs. Check whether this is possible using
1813 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1814
70439f0d
RS
1815static internal_fn
1816vectorizable_internal_function (combined_fn cfn, tree fndecl,
1817 tree vectype_out, tree vectype_in)
ebfd146a 1818{
70439f0d
RS
1819 internal_fn ifn;
1820 if (internal_fn_p (cfn))
1821 ifn = as_internal_fn (cfn);
1822 else
1823 ifn = associated_internal_fn (fndecl);
1824 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1825 {
1826 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1827 if (info.vectorizable)
1828 {
1829 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1830 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1831 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1832 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1833 return ifn;
1834 }
1835 }
1836 return IFN_LAST;
ebfd146a
IR
1837}
1838
5ce9450f 1839
355fe088 1840static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1841 gimple_stmt_iterator *);
1842
7cfb4d93
RS
1843/* Check whether a load or store statement in the loop described by
1844 LOOP_VINFO is possible in a fully-masked loop. This is testing
1845 whether the vectorizer pass has the appropriate support, as well as
1846 whether the target does.
1847
1848 VLS_TYPE says whether the statement is a load or store and VECTYPE
1849 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1850 says how the load or store is going to be implemented and GROUP_SIZE
1851 is the number of load or store statements in the containing group.
bfaa08b7
RS
1852 If the access is a gather load or scatter store, GS_INFO describes
1853 its arguments.
7cfb4d93
RS
1854
1855 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1856 supported, otherwise record the required mask types. */
1857
1858static void
1859check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1860 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1861 vect_memory_access_type memory_access_type,
1862 gather_scatter_info *gs_info)
7cfb4d93
RS
1863{
1864 /* Invariant loads need no special support. */
1865 if (memory_access_type == VMAT_INVARIANT)
1866 return;
1867
1868 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1869 machine_mode vecmode = TYPE_MODE (vectype);
1870 bool is_load = (vls_type == VLS_LOAD);
1871 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1872 {
1873 if (is_load
1874 ? !vect_load_lanes_supported (vectype, group_size, true)
1875 : !vect_store_lanes_supported (vectype, group_size, true))
1876 {
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1879 "can't use a fully-masked loop because the"
1880 " target doesn't have an appropriate masked"
1881 " load/store-lanes instruction.\n");
1882 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1883 return;
1884 }
1885 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1886 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1887 return;
1888 }
1889
bfaa08b7
RS
1890 if (memory_access_type == VMAT_GATHER_SCATTER)
1891 {
f307441a
RS
1892 internal_fn ifn = (is_load
1893 ? IFN_MASK_GATHER_LOAD
1894 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1895 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1896 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1897 gs_info->memory_type,
1898 TYPE_SIGN (offset_type),
1899 gs_info->scale))
1900 {
1901 if (dump_enabled_p ())
1902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1903 "can't use a fully-masked loop because the"
1904 " target doesn't have an appropriate masked"
f307441a 1905 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1906 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1907 return;
1908 }
1909 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1910 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1911 return;
1912 }
1913
7cfb4d93
RS
1914 if (memory_access_type != VMAT_CONTIGUOUS
1915 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1916 {
1917 /* Element X of the data must come from iteration i * VF + X of the
1918 scalar loop. We need more work to support other mappings. */
1919 if (dump_enabled_p ())
1920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1921 "can't use a fully-masked loop because an access"
1922 " isn't contiguous.\n");
1923 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1924 return;
1925 }
1926
1927 machine_mode mask_mode;
1928 if (!(targetm.vectorize.get_mask_mode
1929 (GET_MODE_NUNITS (vecmode),
1930 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1931 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1932 {
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "can't use a fully-masked loop because the target"
1936 " doesn't have the appropriate masked load or"
1937 " store.\n");
1938 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1939 return;
1940 }
1941 /* We might load more scalars than we need for permuting SLP loads.
1942 We checked in get_group_load_store_type that the extra elements
1943 don't leak into a new vector. */
1944 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1945 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1946 unsigned int nvectors;
1947 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1948 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1949 else
1950 gcc_unreachable ();
1951}
1952
1953/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1954 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1955 that needs to be applied to all loads and stores in a vectorized loop.
1956 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1957
1958 MASK_TYPE is the type of both masks. If new statements are needed,
1959 insert them before GSI. */
1960
1961static tree
1962prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1963 gimple_stmt_iterator *gsi)
1964{
1965 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1966 if (!loop_mask)
1967 return vec_mask;
1968
1969 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1970 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1971 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1972 vec_mask, loop_mask);
1973 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1974 return and_res;
1975}
1976
429ef523
RS
1977/* Determine whether we can use a gather load or scatter store to vectorize
1978 strided load or store STMT by truncating the current offset to a smaller
1979 width. We need to be able to construct an offset vector:
1980
1981 { 0, X, X*2, X*3, ... }
1982
1983 without loss of precision, where X is STMT's DR_STEP.
1984
1985 Return true if this is possible, describing the gather load or scatter
1986 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1987
1988static bool
1989vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1990 bool masked_p,
1991 gather_scatter_info *gs_info)
1992{
1993 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1994 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1995 tree step = DR_STEP (dr);
1996 if (TREE_CODE (step) != INTEGER_CST)
1997 {
1998 /* ??? Perhaps we could use range information here? */
1999 if (dump_enabled_p ())
2000 dump_printf_loc (MSG_NOTE, vect_location,
2001 "cannot truncate variable step.\n");
2002 return false;
2003 }
2004
2005 /* Get the number of bits in an element. */
2006 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2007 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2008 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2009
2010 /* Set COUNT to the upper limit on the number of elements - 1.
2011 Start with the maximum vectorization factor. */
2012 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2013
2014 /* Try lowering COUNT to the number of scalar latch iterations. */
2015 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2016 widest_int max_iters;
2017 if (max_loop_iterations (loop, &max_iters)
2018 && max_iters < count)
2019 count = max_iters.to_shwi ();
2020
2021 /* Try scales of 1 and the element size. */
2022 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
4a669ac3 2023 wi::overflow_type overflow = wi::OVF_NONE;
429ef523
RS
2024 for (int i = 0; i < 2; ++i)
2025 {
2026 int scale = scales[i];
2027 widest_int factor;
2028 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2029 continue;
2030
2031 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2032 in OFFSET_BITS bits. */
4a669ac3
AH
2033 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2034 if (overflow)
429ef523
RS
2035 continue;
2036 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2037 if (wi::min_precision (range, sign) > element_bits)
2038 {
4a669ac3 2039 overflow = wi::OVF_UNKNOWN;
429ef523
RS
2040 continue;
2041 }
2042
2043 /* See whether the target supports the operation. */
2044 tree memory_type = TREE_TYPE (DR_REF (dr));
2045 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2046 memory_type, element_bits, sign, scale,
2047 &gs_info->ifn, &gs_info->element_type))
2048 continue;
2049
2050 tree offset_type = build_nonstandard_integer_type (element_bits,
2051 sign == UNSIGNED);
2052
2053 gs_info->decl = NULL_TREE;
2054 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2055 but we don't need to store that here. */
2056 gs_info->base = NULL_TREE;
2057 gs_info->offset = fold_convert (offset_type, step);
929b4411 2058 gs_info->offset_dt = vect_constant_def;
429ef523
RS
2059 gs_info->offset_vectype = NULL_TREE;
2060 gs_info->scale = scale;
2061 gs_info->memory_type = memory_type;
2062 return true;
2063 }
2064
4a669ac3 2065 if (overflow && dump_enabled_p ())
429ef523
RS
2066 dump_printf_loc (MSG_NOTE, vect_location,
2067 "truncating gather/scatter offset to %d bits"
2068 " might change its value.\n", element_bits);
2069
2070 return false;
2071}
2072
ab2fc782
RS
2073/* Return true if we can use gather/scatter internal functions to
2074 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
2075 MASKED_P is true if load or store is conditional. When returning
2076 true, fill in GS_INFO with the information required to perform the
2077 operation. */
ab2fc782
RS
2078
2079static bool
2080vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 2081 bool masked_p,
ab2fc782
RS
2082 gather_scatter_info *gs_info)
2083{
2084 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2085 || gs_info->decl)
429ef523
RS
2086 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2087 masked_p, gs_info);
ab2fc782
RS
2088
2089 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2090 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2091 tree offset_type = TREE_TYPE (gs_info->offset);
2092 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2093
2094 /* Enforced by vect_check_gather_scatter. */
2095 gcc_assert (element_bits >= offset_bits);
2096
2097 /* If the elements are wider than the offset, convert the offset to the
2098 same width, without changing its sign. */
2099 if (element_bits > offset_bits)
2100 {
2101 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2102 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2103 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2104 }
2105
2106 if (dump_enabled_p ())
2107 dump_printf_loc (MSG_NOTE, vect_location,
2108 "using gather/scatter for strided/grouped access,"
2109 " scale = %d\n", gs_info->scale);
2110
2111 return true;
2112}
2113
62da9e14
RS
2114/* STMT is a non-strided load or store, meaning that it accesses
2115 elements with a known constant step. Return -1 if that step
2116 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2117
2118static int
2119compare_step_with_zero (gimple *stmt)
2120{
2121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
2122 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2123 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2124 size_zero_node);
62da9e14
RS
2125}
2126
2127/* If the target supports a permute mask that reverses the elements in
2128 a vector of type VECTYPE, return that mask, otherwise return null. */
2129
2130static tree
2131perm_mask_for_reverse (tree vectype)
2132{
928686b1 2133 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2134
d980067b
RS
2135 /* The encoding has a single stepped pattern. */
2136 vec_perm_builder sel (nunits, 1, 3);
928686b1 2137 for (int i = 0; i < 3; ++i)
908a1a16 2138 sel.quick_push (nunits - 1 - i);
62da9e14 2139
e3342de4
RS
2140 vec_perm_indices indices (sel, 1, nunits);
2141 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2142 return NULL_TREE;
e3342de4 2143 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2144}
5ce9450f 2145
c3a8f964
RS
2146/* STMT is either a masked or unconditional store. Return the value
2147 being stored. */
2148
f307441a 2149tree
c3a8f964
RS
2150vect_get_store_rhs (gimple *stmt)
2151{
2152 if (gassign *assign = dyn_cast <gassign *> (stmt))
2153 {
2154 gcc_assert (gimple_assign_single_p (assign));
2155 return gimple_assign_rhs1 (assign);
2156 }
2157 if (gcall *call = dyn_cast <gcall *> (stmt))
2158 {
2159 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2160 int index = internal_fn_stored_value_index (ifn);
2161 gcc_assert (index >= 0);
2162 return gimple_call_arg (stmt, index);
c3a8f964
RS
2163 }
2164 gcc_unreachable ();
2165}
2166
2de001ee
RS
2167/* A subroutine of get_load_store_type, with a subset of the same
2168 arguments. Handle the case where STMT is part of a grouped load
2169 or store.
2170
2171 For stores, the statements in the group are all consecutive
2172 and there is no gap at the end. For loads, the statements in the
2173 group might not be consecutive; there can be gaps between statements
2174 as well as at the end. */
2175
2176static bool
2177get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2178 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2179 vect_memory_access_type *memory_access_type,
2180 gather_scatter_info *gs_info)
2de001ee
RS
2181{
2182 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2183 vec_info *vinfo = stmt_info->vinfo;
2184 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2185 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
bffb8014
RS
2186 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2187 data_reference *first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
2188 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2189 bool single_element_p = (stmt_info == first_stmt_info
2c53b149 2190 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
bffb8014 2191 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
928686b1 2192 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2193
2194 /* True if the vectorized statements would access beyond the last
2195 statement in the group. */
2196 bool overrun_p = false;
2197
2198 /* True if we can cope with such overrun by peeling for gaps, so that
2199 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2200 bool can_overrun_p = (!masked_p
2201 && vls_type == VLS_LOAD
2202 && loop_vinfo
2203 && !loop->inner);
2de001ee
RS
2204
2205 /* There can only be a gap at the end of the group if the stride is
2206 known at compile time. */
2207 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2208
2209 /* Stores can't yet have gaps. */
2210 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2211
2212 if (slp)
2213 {
2214 if (STMT_VINFO_STRIDED_P (stmt_info))
2215 {
2c53b149 2216 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2de001ee
RS
2217 separated by the stride, until we have a complete vector.
2218 Fall back to scalar accesses if that isn't possible. */
928686b1 2219 if (multiple_p (nunits, group_size))
2de001ee
RS
2220 *memory_access_type = VMAT_STRIDED_SLP;
2221 else
2222 *memory_access_type = VMAT_ELEMENTWISE;
2223 }
2224 else
2225 {
2226 overrun_p = loop_vinfo && gap != 0;
2227 if (overrun_p && vls_type != VLS_LOAD)
2228 {
2229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2230 "Grouped store with gaps requires"
2231 " non-consecutive accesses\n");
2232 return false;
2233 }
f702e7d4
RS
2234 /* An overrun is fine if the trailing elements are smaller
2235 than the alignment boundary B. Every vector access will
2236 be a multiple of B and so we are guaranteed to access a
2237 non-gap element in the same B-sized block. */
f9ef2c76 2238 if (overrun_p
f702e7d4
RS
2239 && gap < (vect_known_alignment_in_bytes (first_dr)
2240 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2241 overrun_p = false;
2de001ee
RS
2242 if (overrun_p && !can_overrun_p)
2243 {
2244 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246 "Peeling for outer loop is not supported\n");
2247 return false;
2248 }
2249 *memory_access_type = VMAT_CONTIGUOUS;
2250 }
2251 }
2252 else
2253 {
2254 /* We can always handle this case using elementwise accesses,
2255 but see if something more efficient is available. */
2256 *memory_access_type = VMAT_ELEMENTWISE;
2257
2258 /* If there is a gap at the end of the group then these optimizations
2259 would access excess elements in the last iteration. */
2260 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2261 /* An overrun is fine if the trailing elements are smaller than the
2262 alignment boundary B. Every vector access will be a multiple of B
2263 and so we are guaranteed to access a non-gap element in the
2264 same B-sized block. */
f9ef2c76 2265 if (would_overrun_p
7e11fc7f 2266 && !masked_p
f702e7d4
RS
2267 && gap < (vect_known_alignment_in_bytes (first_dr)
2268 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2269 would_overrun_p = false;
f702e7d4 2270
2de001ee 2271 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2272 && (can_overrun_p || !would_overrun_p)
2273 && compare_step_with_zero (stmt) > 0)
2de001ee 2274 {
6737facb
RS
2275 /* First cope with the degenerate case of a single-element
2276 vector. */
2277 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2278 *memory_access_type = VMAT_CONTIGUOUS;
2279
2280 /* Otherwise try using LOAD/STORE_LANES. */
2281 if (*memory_access_type == VMAT_ELEMENTWISE
2282 && (vls_type == VLS_LOAD
7e11fc7f
RS
2283 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2284 : vect_store_lanes_supported (vectype, group_size,
2285 masked_p)))
2de001ee
RS
2286 {
2287 *memory_access_type = VMAT_LOAD_STORE_LANES;
2288 overrun_p = would_overrun_p;
2289 }
2290
2291 /* If that fails, try using permuting loads. */
2292 if (*memory_access_type == VMAT_ELEMENTWISE
2293 && (vls_type == VLS_LOAD
2294 ? vect_grouped_load_supported (vectype, single_element_p,
2295 group_size)
2296 : vect_grouped_store_supported (vectype, group_size)))
2297 {
2298 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2299 overrun_p = would_overrun_p;
2300 }
2301 }
429ef523
RS
2302
2303 /* As a last resort, trying using a gather load or scatter store.
2304
2305 ??? Although the code can handle all group sizes correctly,
2306 it probably isn't a win to use separate strided accesses based
2307 on nearby locations. Or, even if it's a win over scalar code,
2308 it might not be a win over vectorizing at a lower VF, if that
2309 allows us to use contiguous accesses. */
2310 if (*memory_access_type == VMAT_ELEMENTWISE
2311 && single_element_p
2312 && loop_vinfo
2313 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2314 masked_p, gs_info))
2315 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2316 }
2317
bffb8014 2318 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2de001ee
RS
2319 {
2320 /* STMT is the leader of the group. Check the operands of all the
2321 stmts of the group. */
bffb8014
RS
2322 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2323 while (next_stmt_info)
2de001ee 2324 {
bffb8014 2325 tree op = vect_get_store_rhs (next_stmt_info);
2de001ee 2326 enum vect_def_type dt;
894dd753 2327 if (!vect_is_simple_use (op, vinfo, &dt))
2de001ee
RS
2328 {
2329 if (dump_enabled_p ())
2330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2331 "use not simple.\n");
2332 return false;
2333 }
bffb8014 2334 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2de001ee
RS
2335 }
2336 }
2337
2338 if (overrun_p)
2339 {
2340 gcc_assert (can_overrun_p);
2341 if (dump_enabled_p ())
2342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2343 "Data access with gaps requires scalar "
2344 "epilogue loop\n");
2345 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2346 }
2347
2348 return true;
2349}
2350
62da9e14
RS
2351/* A subroutine of get_load_store_type, with a subset of the same
2352 arguments. Handle the case where STMT is a load or store that
2353 accesses consecutive elements with a negative step. */
2354
2355static vect_memory_access_type
2356get_negative_load_store_type (gimple *stmt, tree vectype,
2357 vec_load_store_type vls_type,
2358 unsigned int ncopies)
2359{
2360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2361 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2362 dr_alignment_support alignment_support_scheme;
2363
2364 if (ncopies > 1)
2365 {
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368 "multiple types with negative step.\n");
2369 return VMAT_ELEMENTWISE;
2370 }
2371
2372 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2373 if (alignment_support_scheme != dr_aligned
2374 && alignment_support_scheme != dr_unaligned_supported)
2375 {
2376 if (dump_enabled_p ())
2377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2378 "negative step but alignment required.\n");
2379 return VMAT_ELEMENTWISE;
2380 }
2381
2382 if (vls_type == VLS_STORE_INVARIANT)
2383 {
2384 if (dump_enabled_p ())
2385 dump_printf_loc (MSG_NOTE, vect_location,
2386 "negative step with invariant source;"
2387 " no permute needed.\n");
2388 return VMAT_CONTIGUOUS_DOWN;
2389 }
2390
2391 if (!perm_mask_for_reverse (vectype))
2392 {
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2395 "negative step and reversing not supported.\n");
2396 return VMAT_ELEMENTWISE;
2397 }
2398
2399 return VMAT_CONTIGUOUS_REVERSE;
2400}
2401
2de001ee
RS
2402/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2403 if there is a memory access type that the vectorized form can use,
2404 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2405 or scatters, fill in GS_INFO accordingly.
2406
2407 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2408 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2409 VECTYPE is the vector type that the vectorized statements will use.
2410 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2411
2412static bool
7e11fc7f 2413get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2414 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2415 vect_memory_access_type *memory_access_type,
2416 gather_scatter_info *gs_info)
2417{
2418 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2419 vec_info *vinfo = stmt_info->vinfo;
2420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2421 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2422 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2423 {
2424 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2425 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2426 gcc_unreachable ();
894dd753 2427 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2de001ee
RS
2428 &gs_info->offset_dt,
2429 &gs_info->offset_vectype))
2430 {
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "%s index use not simple.\n",
2434 vls_type == VLS_LOAD ? "gather" : "scatter");
2435 return false;
2436 }
2437 }
2438 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2439 {
7e11fc7f 2440 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2441 memory_access_type, gs_info))
2de001ee
RS
2442 return false;
2443 }
2444 else if (STMT_VINFO_STRIDED_P (stmt_info))
2445 {
2446 gcc_assert (!slp);
ab2fc782 2447 if (loop_vinfo
429ef523
RS
2448 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2449 masked_p, gs_info))
ab2fc782
RS
2450 *memory_access_type = VMAT_GATHER_SCATTER;
2451 else
2452 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2453 }
2454 else
62da9e14
RS
2455 {
2456 int cmp = compare_step_with_zero (stmt);
2457 if (cmp < 0)
2458 *memory_access_type = get_negative_load_store_type
2459 (stmt, vectype, vls_type, ncopies);
2460 else if (cmp == 0)
2461 {
2462 gcc_assert (vls_type == VLS_LOAD);
2463 *memory_access_type = VMAT_INVARIANT;
2464 }
2465 else
2466 *memory_access_type = VMAT_CONTIGUOUS;
2467 }
2de001ee 2468
4d694b27
RS
2469 if ((*memory_access_type == VMAT_ELEMENTWISE
2470 || *memory_access_type == VMAT_STRIDED_SLP)
2471 && !nunits.is_constant ())
2472 {
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "Not using elementwise accesses due to variable "
2476 "vectorization factor.\n");
2477 return false;
2478 }
2479
2de001ee
RS
2480 /* FIXME: At the moment the cost model seems to underestimate the
2481 cost of using elementwise accesses. This check preserves the
2482 traditional behavior until that can be fixed. */
2483 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8 2484 && !STMT_VINFO_STRIDED_P (stmt_info)
bffb8014 2485 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2c53b149
RB
2486 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2487 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2de001ee
RS
2488 {
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2491 "not falling back to elementwise accesses\n");
2492 return false;
2493 }
2494 return true;
2495}
2496
aaeefd88 2497/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2498 conditional load or store STMT. When returning true, store the type
2499 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2500 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2501
2502static bool
929b4411
RS
2503vect_check_load_store_mask (gimple *stmt, tree mask,
2504 vect_def_type *mask_dt_out,
2505 tree *mask_vectype_out)
aaeefd88
RS
2506{
2507 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2508 {
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2511 "mask argument is not a boolean.\n");
2512 return false;
2513 }
2514
2515 if (TREE_CODE (mask) != SSA_NAME)
2516 {
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "mask argument is not an SSA name.\n");
2520 return false;
2521 }
2522
2523 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2524 enum vect_def_type mask_dt;
aaeefd88 2525 tree mask_vectype;
894dd753 2526 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
aaeefd88
RS
2527 {
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2530 "mask use not simple.\n");
2531 return false;
2532 }
2533
2534 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2535 if (!mask_vectype)
2536 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2537
2538 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2539 {
2540 if (dump_enabled_p ())
2541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2542 "could not find an appropriate vector mask type.\n");
2543 return false;
2544 }
2545
2546 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2547 TYPE_VECTOR_SUBPARTS (vectype)))
2548 {
2549 if (dump_enabled_p ())
2550 {
2551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2552 "vector mask type ");
2553 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2554 dump_printf (MSG_MISSED_OPTIMIZATION,
2555 " does not match vector data type ");
2556 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2557 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2558 }
2559 return false;
2560 }
2561
929b4411 2562 *mask_dt_out = mask_dt;
aaeefd88
RS
2563 *mask_vectype_out = mask_vectype;
2564 return true;
2565}
2566
3133c3b6
RS
2567/* Return true if stored value RHS is suitable for vectorizing store
2568 statement STMT. When returning true, store the type of the
929b4411
RS
2569 definition in *RHS_DT_OUT, the type of the vectorized store value in
2570 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2571
2572static bool
929b4411
RS
2573vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2574 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2575{
2576 /* In the case this is a store from a constant make sure
2577 native_encode_expr can handle it. */
2578 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2579 {
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2582 "cannot encode constant as a byte sequence.\n");
2583 return false;
2584 }
2585
2586 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929b4411 2587 enum vect_def_type rhs_dt;
3133c3b6 2588 tree rhs_vectype;
894dd753 2589 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
3133c3b6
RS
2590 {
2591 if (dump_enabled_p ())
2592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2593 "use not simple.\n");
2594 return false;
2595 }
2596
2597 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2598 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2599 {
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 "incompatible vector types.\n");
2603 return false;
2604 }
2605
929b4411 2606 *rhs_dt_out = rhs_dt;
3133c3b6 2607 *rhs_vectype_out = rhs_vectype;
929b4411 2608 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2609 *vls_type_out = VLS_STORE_INVARIANT;
2610 else
2611 *vls_type_out = VLS_STORE;
2612 return true;
2613}
2614
bc9587eb
RS
2615/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2616 Note that we support masks with floating-point type, in which case the
2617 floats are interpreted as a bitmask. */
2618
2619static tree
2620vect_build_all_ones_mask (gimple *stmt, tree masktype)
2621{
2622 if (TREE_CODE (masktype) == INTEGER_TYPE)
2623 return build_int_cst (masktype, -1);
2624 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2625 {
2626 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2627 mask = build_vector_from_val (masktype, mask);
2628 return vect_init_vector (stmt, mask, masktype, NULL);
2629 }
2630 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2631 {
2632 REAL_VALUE_TYPE r;
2633 long tmp[6];
2634 for (int j = 0; j < 6; ++j)
2635 tmp[j] = -1;
2636 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2637 tree mask = build_real (TREE_TYPE (masktype), r);
2638 mask = build_vector_from_val (masktype, mask);
2639 return vect_init_vector (stmt, mask, masktype, NULL);
2640 }
2641 gcc_unreachable ();
2642}
2643
2644/* Build an all-zero merge value of type VECTYPE while vectorizing
2645 STMT as a gather load. */
2646
2647static tree
2648vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2649{
2650 tree merge;
2651 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2652 merge = build_int_cst (TREE_TYPE (vectype), 0);
2653 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2654 {
2655 REAL_VALUE_TYPE r;
2656 long tmp[6];
2657 for (int j = 0; j < 6; ++j)
2658 tmp[j] = 0;
2659 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2660 merge = build_real (TREE_TYPE (vectype), r);
2661 }
2662 else
2663 gcc_unreachable ();
2664 merge = build_vector_from_val (vectype, merge);
2665 return vect_init_vector (stmt, merge, vectype, NULL);
2666}
2667
c48d2d35
RS
2668/* Build a gather load call while vectorizing STMT. Insert new instructions
2669 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2670 operation. If the load is conditional, MASK is the unvectorized
929b4411 2671 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2672
2673static void
2674vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195
RS
2675 stmt_vec_info *vec_stmt,
2676 gather_scatter_info *gs_info, tree mask,
2677 vect_def_type mask_dt)
c48d2d35
RS
2678{
2679 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2680 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2681 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2682 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2683 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2684 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2685 edge pe = loop_preheader_edge (loop);
2686 enum { NARROW, NONE, WIDEN } modifier;
2687 poly_uint64 gather_off_nunits
2688 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2689
2690 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2691 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2692 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2693 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2694 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2695 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2696 tree scaletype = TREE_VALUE (arglist);
2697 gcc_checking_assert (types_compatible_p (srctype, rettype)
2698 && (!mask || types_compatible_p (srctype, masktype)));
2699
2700 tree perm_mask = NULL_TREE;
2701 tree mask_perm_mask = NULL_TREE;
2702 if (known_eq (nunits, gather_off_nunits))
2703 modifier = NONE;
2704 else if (known_eq (nunits * 2, gather_off_nunits))
2705 {
2706 modifier = WIDEN;
2707
2708 /* Currently widening gathers and scatters are only supported for
2709 fixed-length vectors. */
2710 int count = gather_off_nunits.to_constant ();
2711 vec_perm_builder sel (count, count, 1);
2712 for (int i = 0; i < count; ++i)
2713 sel.quick_push (i | (count / 2));
2714
2715 vec_perm_indices indices (sel, 1, count);
2716 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2717 indices);
2718 }
2719 else if (known_eq (nunits, gather_off_nunits * 2))
2720 {
2721 modifier = NARROW;
2722
2723 /* Currently narrowing gathers and scatters are only supported for
2724 fixed-length vectors. */
2725 int count = nunits.to_constant ();
2726 vec_perm_builder sel (count, count, 1);
2727 sel.quick_grow (count);
2728 for (int i = 0; i < count; ++i)
2729 sel[i] = i < count / 2 ? i : i + count / 2;
2730 vec_perm_indices indices (sel, 2, count);
2731 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2732
2733 ncopies *= 2;
2734
2735 if (mask)
2736 {
2737 for (int i = 0; i < count; ++i)
2738 sel[i] = i | (count / 2);
2739 indices.new_vector (sel, 2, count);
2740 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2741 }
2742 }
2743 else
2744 gcc_unreachable ();
2745
2746 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2747 vectype);
2748
2749 tree ptr = fold_convert (ptrtype, gs_info->base);
2750 if (!is_gimple_min_invariant (ptr))
2751 {
2752 gimple_seq seq;
2753 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2754 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2755 gcc_assert (!new_bb);
2756 }
2757
2758 tree scale = build_int_cst (scaletype, gs_info->scale);
2759
2760 tree vec_oprnd0 = NULL_TREE;
2761 tree vec_mask = NULL_TREE;
2762 tree src_op = NULL_TREE;
2763 tree mask_op = NULL_TREE;
2764 tree prev_res = NULL_TREE;
2765 stmt_vec_info prev_stmt_info = NULL;
2766
2767 if (!mask)
2768 {
2769 src_op = vect_build_zero_merge_argument (stmt, rettype);
2770 mask_op = vect_build_all_ones_mask (stmt, masktype);
2771 }
2772
2773 for (int j = 0; j < ncopies; ++j)
2774 {
2775 tree op, var;
c48d2d35
RS
2776 if (modifier == WIDEN && (j & 1))
2777 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2778 perm_mask, stmt, gsi);
2779 else if (j == 0)
2780 op = vec_oprnd0
2781 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2782 else
2783 op = vec_oprnd0
2784 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2785
2786 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2787 {
2788 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2789 TYPE_VECTOR_SUBPARTS (idxtype)));
2790 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2791 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
e1bd7296 2792 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
c48d2d35
RS
2793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2794 op = var;
2795 }
2796
2797 if (mask)
2798 {
2799 if (mask_perm_mask && (j & 1))
2800 mask_op = permute_vec_elements (mask_op, mask_op,
2801 mask_perm_mask, stmt, gsi);
2802 else
2803 {
2804 if (j == 0)
2805 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2806 else
929b4411 2807 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2808
2809 mask_op = vec_mask;
2810 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2811 {
2812 gcc_assert
2813 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2814 TYPE_VECTOR_SUBPARTS (masktype)));
2815 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2816 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
e1bd7296
RS
2817 gassign *new_stmt
2818 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
c48d2d35
RS
2819 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2820 mask_op = var;
2821 }
2822 }
2823 src_op = mask_op;
2824 }
2825
e1bd7296
RS
2826 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2827 mask_op, scale);
c48d2d35 2828
e1bd7296 2829 stmt_vec_info new_stmt_info;
c48d2d35
RS
2830 if (!useless_type_conversion_p (vectype, rettype))
2831 {
2832 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2833 TYPE_VECTOR_SUBPARTS (rettype)));
2834 op = vect_get_new_ssa_name (rettype, vect_simple_var);
e1bd7296
RS
2835 gimple_call_set_lhs (new_call, op);
2836 vect_finish_stmt_generation (stmt, new_call, gsi);
c48d2d35
RS
2837 var = make_ssa_name (vec_dest);
2838 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
e1bd7296
RS
2839 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2840 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
c48d2d35
RS
2841 }
2842 else
2843 {
e1bd7296
RS
2844 var = make_ssa_name (vec_dest, new_call);
2845 gimple_call_set_lhs (new_call, var);
2846 new_stmt_info = vect_finish_stmt_generation (stmt, new_call, gsi);
c48d2d35
RS
2847 }
2848
c48d2d35
RS
2849 if (modifier == NARROW)
2850 {
2851 if ((j & 1) == 0)
2852 {
2853 prev_res = var;
2854 continue;
2855 }
2856 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
e1bd7296 2857 new_stmt_info = loop_vinfo->lookup_def (var);
c48d2d35
RS
2858 }
2859
dbe1b846 2860 if (prev_stmt_info == NULL_STMT_VEC_INFO)
e1bd7296 2861 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
c48d2d35 2862 else
e1bd7296
RS
2863 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2864 prev_stmt_info = new_stmt_info;
c48d2d35
RS
2865 }
2866}
2867
bfaa08b7
RS
2868/* Prepare the base and offset in GS_INFO for vectorization.
2869 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2870 to the vectorized offset argument for the first copy of STMT. STMT
2871 is the statement described by GS_INFO and LOOP is the containing loop. */
2872
2873static void
2874vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2875 gather_scatter_info *gs_info,
2876 tree *dataref_ptr, tree *vec_offset)
2877{
2878 gimple_seq stmts = NULL;
2879 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2880 if (stmts != NULL)
2881 {
2882 basic_block new_bb;
2883 edge pe = loop_preheader_edge (loop);
2884 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2885 gcc_assert (!new_bb);
2886 }
2887 tree offset_type = TREE_TYPE (gs_info->offset);
2888 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2889 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2890 offset_vectype);
2891}
2892
ab2fc782
RS
2893/* Prepare to implement a grouped or strided load or store using
2894 the gather load or scatter store operation described by GS_INFO.
2895 STMT is the load or store statement.
2896
2897 Set *DATAREF_BUMP to the amount that should be added to the base
2898 address after each copy of the vectorized statement. Set *VEC_OFFSET
2899 to an invariant offset vector in which element I has the value
2900 I * DR_STEP / SCALE. */
2901
2902static void
2903vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2904 gather_scatter_info *gs_info,
2905 tree *dataref_bump, tree *vec_offset)
2906{
2907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2908 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2909 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2910 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2911 gimple_seq stmts;
2912
2913 tree bump = size_binop (MULT_EXPR,
2914 fold_convert (sizetype, DR_STEP (dr)),
2915 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2916 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2917 if (stmts)
2918 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2919
2920 /* The offset given in GS_INFO can have pointer type, so use the element
2921 type of the vector instead. */
2922 tree offset_type = TREE_TYPE (gs_info->offset);
2923 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2924 offset_type = TREE_TYPE (offset_vectype);
2925
2926 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2927 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2928 ssize_int (gs_info->scale));
2929 step = fold_convert (offset_type, step);
2930 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2931
2932 /* Create {0, X, X*2, X*3, ...}. */
2933 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2934 build_zero_cst (offset_type), step);
2935 if (stmts)
2936 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2937}
2938
2939/* Return the amount that should be added to a vector pointer to move
2940 to the next or previous copy of AGGR_TYPE. DR is the data reference
2941 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2942 vectorization. */
2943
2944static tree
2945vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2946 vect_memory_access_type memory_access_type)
2947{
2948 if (memory_access_type == VMAT_INVARIANT)
2949 return size_zero_node;
2950
2951 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2952 tree step = vect_dr_behavior (dr)->step;
2953 if (tree_int_cst_sgn (step) == -1)
2954 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2955 return iv_step;
2956}
2957
37b14185
RB
2958/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2959
2960static bool
2961vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 2962 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2
RB
2963 tree vectype_in, enum vect_def_type *dt,
2964 stmt_vector_for_cost *cost_vec)
37b14185
RB
2965{
2966 tree op, vectype;
2967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2968 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2969 unsigned ncopies;
2970 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2971
2972 op = gimple_call_arg (stmt, 0);
2973 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2974
2975 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2976 return false;
37b14185
RB
2977
2978 /* Multiple types in SLP are handled by creating the appropriate number of
2979 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2980 case of SLP. */
2981 if (slp_node)
2982 ncopies = 1;
2983 else
e8f142e2 2984 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2985
2986 gcc_assert (ncopies >= 1);
2987
2988 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2989 if (! char_vectype)
2990 return false;
2991
928686b1
RS
2992 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2993 return false;
2994
794e3180 2995 unsigned word_bytes = num_bytes / nunits;
908a1a16 2996
d980067b
RS
2997 /* The encoding uses one stepped pattern for each byte in the word. */
2998 vec_perm_builder elts (num_bytes, word_bytes, 3);
2999 for (unsigned i = 0; i < 3; ++i)
37b14185 3000 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 3001 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 3002
e3342de4
RS
3003 vec_perm_indices indices (elts, 1, num_bytes);
3004 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
3005 return false;
3006
3007 if (! vec_stmt)
3008 {
3009 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3010 DUMP_VECT_SCOPE ("vectorizable_bswap");
78604de0 3011 if (! slp_node)
37b14185 3012 {
68435eb2
RB
3013 record_stmt_cost (cost_vec,
3014 1, vector_stmt, stmt_info, 0, vect_prologue);
3015 record_stmt_cost (cost_vec,
3016 ncopies, vec_perm, stmt_info, 0, vect_body);
37b14185
RB
3017 }
3018 return true;
3019 }
3020
736d0f28 3021 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
3022
3023 /* Transform. */
3024 vec<tree> vec_oprnds = vNULL;
e1bd7296 3025 stmt_vec_info new_stmt_info = NULL;
37b14185
RB
3026 stmt_vec_info prev_stmt_info = NULL;
3027 for (unsigned j = 0; j < ncopies; j++)
3028 {
3029 /* Handle uses. */
3030 if (j == 0)
306b0c92 3031 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
3032 else
3033 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3034
3035 /* Arguments are ready. create the new vector stmt. */
3036 unsigned i;
3037 tree vop;
3038 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3039 {
e1bd7296 3040 gimple *new_stmt;
37b14185
RB
3041 tree tem = make_ssa_name (char_vectype);
3042 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3043 char_vectype, vop));
3044 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3045 tree tem2 = make_ssa_name (char_vectype);
3046 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3047 tem, tem, bswap_vconst);
3048 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3049 tem = make_ssa_name (vectype);
3050 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3051 vectype, tem2));
e1bd7296 3052 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
37b14185 3053 if (slp_node)
e1bd7296 3054 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
37b14185
RB
3055 }
3056
3057 if (slp_node)
3058 continue;
3059
3060 if (j == 0)
e1bd7296 3061 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
37b14185 3062 else
e1bd7296 3063 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
37b14185 3064
e1bd7296 3065 prev_stmt_info = new_stmt_info;
37b14185
RB
3066 }
3067
3068 vec_oprnds.release ();
3069 return true;
3070}
3071
b1b6836e
RS
3072/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3073 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3074 in a single step. On success, store the binary pack code in
3075 *CONVERT_CODE. */
3076
3077static bool
3078simple_integer_narrowing (tree vectype_out, tree vectype_in,
3079 tree_code *convert_code)
3080{
3081 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3082 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3083 return false;
3084
3085 tree_code code;
3086 int multi_step_cvt = 0;
3087 auto_vec <tree, 8> interm_types;
3088 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3089 &code, &multi_step_cvt,
3090 &interm_types)
3091 || multi_step_cvt)
3092 return false;
3093
3094 *convert_code = code;
3095 return true;
3096}
5ce9450f 3097
ebfd146a
IR
3098/* Function vectorizable_call.
3099
538dd0b7 3100 Check if GS performs a function call that can be vectorized.
b8698a0f 3101 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3102 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3103 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3104
3105static bool
1eede195
RS
3106vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi,
3107 stmt_vec_info *vec_stmt, slp_tree slp_node,
3108 stmt_vector_for_cost *cost_vec)
ebfd146a 3109{
538dd0b7 3110 gcall *stmt;
ebfd146a
IR
3111 tree vec_dest;
3112 tree scalar_dest;
0267732b 3113 tree op;
ebfd146a 3114 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 3115 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 3116 tree vectype_out, vectype_in;
c7bda0f4
RS
3117 poly_uint64 nunits_in;
3118 poly_uint64 nunits_out;
ebfd146a 3119 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 3120 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3121 vec_info *vinfo = stmt_info->vinfo;
81c40241 3122 tree fndecl, new_temp, rhs_type;
2c58d42c
RS
3123 enum vect_def_type dt[4]
3124 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3125 vect_unknown_def_type };
3126 int ndts = ARRAY_SIZE (dt);
ebfd146a 3127 int ncopies, j;
2c58d42c
RS
3128 auto_vec<tree, 8> vargs;
3129 auto_vec<tree, 8> orig_vargs;
ebfd146a
IR
3130 enum { NARROW, NONE, WIDEN } modifier;
3131 size_t i, nargs;
9d5e7640 3132 tree lhs;
ebfd146a 3133
190c2236 3134 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3135 return false;
3136
66c16fd9
RB
3137 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3138 && ! vec_stmt)
ebfd146a
IR
3139 return false;
3140
538dd0b7
DM
3141 /* Is GS a vectorizable call? */
3142 stmt = dyn_cast <gcall *> (gs);
3143 if (!stmt)
ebfd146a
IR
3144 return false;
3145
5ce9450f 3146 if (gimple_call_internal_p (stmt)
bfaa08b7 3147 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3148 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3149 /* Handled by vectorizable_load and vectorizable_store. */
3150 return false;
5ce9450f 3151
0136f8f0
AH
3152 if (gimple_call_lhs (stmt) == NULL_TREE
3153 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3154 return false;
3155
0136f8f0 3156 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3157
b690cc0f
RG
3158 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3159
ebfd146a
IR
3160 /* Process function arguments. */
3161 rhs_type = NULL_TREE;
b690cc0f 3162 vectype_in = NULL_TREE;
ebfd146a
IR
3163 nargs = gimple_call_num_args (stmt);
3164
1b1562a5
MM
3165 /* Bail out if the function has more than three arguments, we do not have
3166 interesting builtin functions to vectorize with more than two arguments
3167 except for fma. No arguments is also not good. */
2c58d42c 3168 if (nargs == 0 || nargs > 4)
ebfd146a
IR
3169 return false;
3170
74bf76ed 3171 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2c58d42c
RS
3172 combined_fn cfn = gimple_call_combined_fn (stmt);
3173 if (cfn == CFN_GOMP_SIMD_LANE)
74bf76ed
JJ
3174 {
3175 nargs = 0;
3176 rhs_type = unsigned_type_node;
3177 }
3178
2c58d42c
RS
3179 int mask_opno = -1;
3180 if (internal_fn_p (cfn))
3181 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3182
ebfd146a
IR
3183 for (i = 0; i < nargs; i++)
3184 {
b690cc0f
RG
3185 tree opvectype;
3186
ebfd146a 3187 op = gimple_call_arg (stmt, i);
2c58d42c
RS
3188 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3189 {
3190 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3192 "use not simple.\n");
3193 return false;
3194 }
3195
3196 /* Skip the mask argument to an internal function. This operand
3197 has been converted via a pattern if necessary. */
3198 if ((int) i == mask_opno)
3199 continue;
ebfd146a
IR
3200
3201 /* We can only handle calls with arguments of the same type. */
3202 if (rhs_type
8533c9d8 3203 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3204 {
73fbfcad 3205 if (dump_enabled_p ())
78c60e3d 3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3207 "argument types differ.\n");
ebfd146a
IR
3208 return false;
3209 }
b690cc0f
RG
3210 if (!rhs_type)
3211 rhs_type = TREE_TYPE (op);
ebfd146a 3212
b690cc0f
RG
3213 if (!vectype_in)
3214 vectype_in = opvectype;
3215 else if (opvectype
3216 && opvectype != vectype_in)
3217 {
73fbfcad 3218 if (dump_enabled_p ())
78c60e3d 3219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3220 "argument vector types differ.\n");
b690cc0f
RG
3221 return false;
3222 }
3223 }
3224 /* If all arguments are external or constant defs use a vector type with
3225 the same size as the output vector type. */
ebfd146a 3226 if (!vectype_in)
b690cc0f 3227 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3228 if (vec_stmt)
3229 gcc_assert (vectype_in);
3230 if (!vectype_in)
3231 {
73fbfcad 3232 if (dump_enabled_p ())
7d8930a0 3233 {
78c60e3d
SS
3234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3235 "no vectype for scalar type ");
3236 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3237 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3238 }
3239
3240 return false;
3241 }
ebfd146a
IR
3242
3243 /* FORNOW */
b690cc0f
RG
3244 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3245 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3246 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3247 modifier = NARROW;
c7bda0f4 3248 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3249 modifier = NONE;
c7bda0f4 3250 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3251 modifier = WIDEN;
3252 else
3253 return false;
3254
70439f0d
RS
3255 /* We only handle functions that do not read or clobber memory. */
3256 if (gimple_vuse (stmt))
3257 {
3258 if (dump_enabled_p ())
3259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3260 "function reads from or writes to memory.\n");
3261 return false;
3262 }
3263
ebfd146a
IR
3264 /* For now, we only vectorize functions if a target specific builtin
3265 is available. TODO -- in some cases, it might be profitable to
3266 insert the calls for pieces of the vector, in order to be able
3267 to vectorize other operations in the loop. */
70439f0d
RS
3268 fndecl = NULL_TREE;
3269 internal_fn ifn = IFN_LAST;
70439f0d
RS
3270 tree callee = gimple_call_fndecl (stmt);
3271
3272 /* First try using an internal function. */
b1b6836e
RS
3273 tree_code convert_code = ERROR_MARK;
3274 if (cfn != CFN_LAST
3275 && (modifier == NONE
3276 || (modifier == NARROW
3277 && simple_integer_narrowing (vectype_out, vectype_in,
3278 &convert_code))))
70439f0d
RS
3279 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3280 vectype_in);
3281
3282 /* If that fails, try asking for a target-specific built-in function. */
3283 if (ifn == IFN_LAST)
3284 {
3285 if (cfn != CFN_LAST)
3286 fndecl = targetm.vectorize.builtin_vectorized_function
3287 (cfn, vectype_out, vectype_in);
7672aa9b 3288 else if (callee)
70439f0d
RS
3289 fndecl = targetm.vectorize.builtin_md_vectorized_function
3290 (callee, vectype_out, vectype_in);
3291 }
3292
3293 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3294 {
70439f0d 3295 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3296 && !slp_node
3297 && loop_vinfo
3298 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3299 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3300 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3301 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3302 {
3303 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3304 { 0, 1, 2, ... vf - 1 } vector. */
3305 gcc_assert (nargs == 0);
3306 }
37b14185
RB
3307 else if (modifier == NONE
3308 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3309 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3310 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3311 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
68435eb2 3312 vectype_in, dt, cost_vec);
74bf76ed
JJ
3313 else
3314 {
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3317 "function is not vectorizable.\n");
74bf76ed
JJ
3318 return false;
3319 }
ebfd146a
IR
3320 }
3321
fce57248 3322 if (slp_node)
190c2236 3323 ncopies = 1;
b1b6836e 3324 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3325 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3326 else
e8f142e2 3327 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3328
3329 /* Sanity check: make sure that at least one copy of the vectorized stmt
3330 needs to be generated. */
3331 gcc_assert (ncopies >= 1);
3332
ed623edb 3333 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
ebfd146a
IR
3334 if (!vec_stmt) /* transformation not required. */
3335 {
3336 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
adac3a68 3337 DUMP_VECT_SCOPE ("vectorizable_call");
68435eb2
RB
3338 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3339 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3340 record_stmt_cost (cost_vec, ncopies / 2,
3341 vec_promote_demote, stmt_info, 0, vect_body);
b1b6836e 3342
2c58d42c
RS
3343 if (loop_vinfo && mask_opno >= 0)
3344 {
3345 unsigned int nvectors = (slp_node
3346 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3347 : ncopies);
3348 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3349 }
ebfd146a
IR
3350 return true;
3351 }
3352
67b8dbac 3353 /* Transform. */
ebfd146a 3354
73fbfcad 3355 if (dump_enabled_p ())
e645e942 3356 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3357
3358 /* Handle def. */
3359 scalar_dest = gimple_call_lhs (stmt);
3360 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3361
2c58d42c
RS
3362 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3363
e1bd7296 3364 stmt_vec_info new_stmt_info = NULL;
ebfd146a 3365 prev_stmt_info = NULL;
b1b6836e 3366 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3367 {
b1b6836e 3368 tree prev_res = NULL_TREE;
2c58d42c
RS
3369 vargs.safe_grow (nargs);
3370 orig_vargs.safe_grow (nargs);
ebfd146a
IR
3371 for (j = 0; j < ncopies; ++j)
3372 {
3373 /* Build argument list for the vectorized call. */
190c2236
JJ
3374 if (slp_node)
3375 {
ef062b13 3376 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3377 vec<tree> vec_oprnds0;
190c2236
JJ
3378
3379 for (i = 0; i < nargs; i++)
2c58d42c 3380 vargs[i] = gimple_call_arg (stmt, i);
306b0c92 3381 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3382 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3383
3384 /* Arguments are ready. Create the new vector stmt. */
9771b263 3385 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3386 {
3387 size_t k;
3388 for (k = 0; k < nargs; k++)
3389 {
37b5ec8f 3390 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3391 vargs[k] = vec_oprndsk[i];
190c2236 3392 }
b1b6836e
RS
3393 if (modifier == NARROW)
3394 {
2c58d42c
RS
3395 /* We don't define any narrowing conditional functions
3396 at present. */
3397 gcc_assert (mask_opno < 0);
b1b6836e 3398 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3399 gcall *call
3400 = gimple_build_call_internal_vec (ifn, vargs);
3401 gimple_call_set_lhs (call, half_res);
3402 gimple_call_set_nothrow (call, true);
e1bd7296
RS
3403 new_stmt_info
3404 = vect_finish_stmt_generation (stmt, call, gsi);
b1b6836e
RS
3405 if ((i & 1) == 0)
3406 {
3407 prev_res = half_res;
3408 continue;
3409 }
3410 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3411 gimple *new_stmt
3412 = gimple_build_assign (new_temp, convert_code,
3413 prev_res, half_res);
3414 new_stmt_info
3415 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
b1b6836e 3416 }
70439f0d 3417 else
b1b6836e 3418 {
2c58d42c
RS
3419 if (mask_opno >= 0 && masked_loop_p)
3420 {
3421 unsigned int vec_num = vec_oprnds0.length ();
3422 /* Always true for SLP. */
3423 gcc_assert (ncopies == 1);
3424 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3425 vectype_out, i);
3426 vargs[mask_opno] = prepare_load_store_mask
3427 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3428 }
3429
a844293d 3430 gcall *call;
b1b6836e 3431 if (ifn != IFN_LAST)
a844293d 3432 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3433 else
a844293d
RS
3434 call = gimple_build_call_vec (fndecl, vargs);
3435 new_temp = make_ssa_name (vec_dest, call);
3436 gimple_call_set_lhs (call, new_temp);
3437 gimple_call_set_nothrow (call, true);
e1bd7296
RS
3438 new_stmt_info
3439 = vect_finish_stmt_generation (stmt, call, gsi);
b1b6836e 3440 }
e1bd7296 3441 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
190c2236
JJ
3442 }
3443
3444 for (i = 0; i < nargs; i++)
3445 {
37b5ec8f 3446 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3447 vec_oprndsi.release ();
190c2236 3448 }
190c2236
JJ
3449 continue;
3450 }
3451
ebfd146a
IR
3452 for (i = 0; i < nargs; i++)
3453 {
3454 op = gimple_call_arg (stmt, i);
3455 if (j == 0)
3456 vec_oprnd0
81c40241 3457 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3458 else
2c58d42c
RS
3459 vec_oprnd0
3460 = vect_get_vec_def_for_stmt_copy (dt[i], orig_vargs[i]);
3461
3462 orig_vargs[i] = vargs[i] = vec_oprnd0;
3463 }
ebfd146a 3464
2c58d42c
RS
3465 if (mask_opno >= 0 && masked_loop_p)
3466 {
3467 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3468 vectype_out, j);
3469 vargs[mask_opno]
3470 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3471 vargs[mask_opno], gsi);
ebfd146a
IR
3472 }
3473
2c58d42c 3474 if (cfn == CFN_GOMP_SIMD_LANE)
74bf76ed 3475 {
c7bda0f4 3476 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3477 tree new_var
0e22bb5a 3478 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3479 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3480 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3481 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3482 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3483 new_stmt_info
3484 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
74bf76ed 3485 }
b1b6836e
RS
3486 else if (modifier == NARROW)
3487 {
2c58d42c
RS
3488 /* We don't define any narrowing conditional functions at
3489 present. */
3490 gcc_assert (mask_opno < 0);
b1b6836e 3491 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3492 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3493 gimple_call_set_lhs (call, half_res);
3494 gimple_call_set_nothrow (call, true);
e1bd7296 3495 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
b1b6836e
RS
3496 if ((j & 1) == 0)
3497 {
3498 prev_res = half_res;
3499 continue;
3500 }
3501 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
3502 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3503 prev_res, half_res);
3504 new_stmt_info
3505 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
b1b6836e 3506 }
74bf76ed
JJ
3507 else
3508 {
a844293d 3509 gcall *call;
70439f0d 3510 if (ifn != IFN_LAST)
a844293d 3511 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3512 else
a844293d 3513 call = gimple_build_call_vec (fndecl, vargs);
e1bd7296 3514 new_temp = make_ssa_name (vec_dest, call);
a844293d
RS
3515 gimple_call_set_lhs (call, new_temp);
3516 gimple_call_set_nothrow (call, true);
e1bd7296 3517 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
74bf76ed 3518 }
ebfd146a 3519
b1b6836e 3520 if (j == (modifier == NARROW ? 1 : 0))
e1bd7296 3521 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
ebfd146a 3522 else
e1bd7296 3523 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
ebfd146a 3524
e1bd7296 3525 prev_stmt_info = new_stmt_info;
ebfd146a 3526 }
b1b6836e
RS
3527 }
3528 else if (modifier == NARROW)
3529 {
2c58d42c
RS
3530 /* We don't define any narrowing conditional functions at present. */
3531 gcc_assert (mask_opno < 0);
ebfd146a
IR
3532 for (j = 0; j < ncopies; ++j)
3533 {
3534 /* Build argument list for the vectorized call. */
3535 if (j == 0)
9771b263 3536 vargs.create (nargs * 2);
ebfd146a 3537 else
9771b263 3538 vargs.truncate (0);
ebfd146a 3539
190c2236
JJ
3540 if (slp_node)
3541 {
ef062b13 3542 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3543 vec<tree> vec_oprnds0;
190c2236
JJ
3544
3545 for (i = 0; i < nargs; i++)
9771b263 3546 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3547 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3548 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3549
3550 /* Arguments are ready. Create the new vector stmt. */
9771b263 3551 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3552 {
3553 size_t k;
9771b263 3554 vargs.truncate (0);
190c2236
JJ
3555 for (k = 0; k < nargs; k++)
3556 {
37b5ec8f 3557 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3558 vargs.quick_push (vec_oprndsk[i]);
3559 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3560 }
a844293d 3561 gcall *call;
70439f0d 3562 if (ifn != IFN_LAST)
a844293d 3563 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3564 else
a844293d
RS
3565 call = gimple_build_call_vec (fndecl, vargs);
3566 new_temp = make_ssa_name (vec_dest, call);
3567 gimple_call_set_lhs (call, new_temp);
3568 gimple_call_set_nothrow (call, true);
e1bd7296
RS
3569 new_stmt_info
3570 = vect_finish_stmt_generation (stmt, call, gsi);
3571 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
190c2236
JJ
3572 }
3573
3574 for (i = 0; i < nargs; i++)
3575 {
37b5ec8f 3576 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3577 vec_oprndsi.release ();
190c2236 3578 }
190c2236
JJ
3579 continue;
3580 }
3581
ebfd146a
IR
3582 for (i = 0; i < nargs; i++)
3583 {
3584 op = gimple_call_arg (stmt, i);
3585 if (j == 0)
3586 {
3587 vec_oprnd0
81c40241 3588 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3589 vec_oprnd1
63827fb8 3590 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3591 }
3592 else
3593 {
e1bd7296
RS
3594 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3595 2 * i + 1);
ebfd146a 3596 vec_oprnd0
63827fb8 3597 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3598 vec_oprnd1
63827fb8 3599 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3600 }
3601
9771b263
DN
3602 vargs.quick_push (vec_oprnd0);
3603 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3604 }
3605
e1bd7296 3606 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3607 new_temp = make_ssa_name (vec_dest, new_stmt);
3608 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296 3609 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
3610
3611 if (j == 0)
e1bd7296 3612 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
ebfd146a 3613 else
e1bd7296 3614 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
ebfd146a 3615
e1bd7296 3616 prev_stmt_info = new_stmt_info;
ebfd146a
IR
3617 }
3618
3619 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3620 }
b1b6836e
RS
3621 else
3622 /* No current target implements this case. */
3623 return false;
ebfd146a 3624
9771b263 3625 vargs.release ();
ebfd146a 3626
ebfd146a
IR
3627 /* The call in STMT might prevent it from being removed in dce.
3628 We however cannot remove it here, due to the way the ssa name
3629 it defines is mapped to the new definition. So just replace
3630 rhs of the statement with something harmless. */
3631
dd34c087
JJ
3632 if (slp_node)
3633 return true;
3634
9d5e7640 3635 if (is_pattern_stmt_p (stmt_info))
10681ce8 3636 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
ed7b8123 3637 lhs = gimple_get_lhs (stmt_info->stmt);
3cc2fa2a 3638
e1bd7296
RS
3639 gassign *new_stmt
3640 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
ebfd146a 3641 set_vinfo_for_stmt (new_stmt, stmt_info);
ed7b8123 3642 set_vinfo_for_stmt (stmt_info->stmt, NULL);
ebfd146a
IR
3643 STMT_VINFO_STMT (stmt_info) = new_stmt;
3644 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3645
3646 return true;
3647}
3648
3649
0136f8f0
AH
3650struct simd_call_arg_info
3651{
3652 tree vectype;
3653 tree op;
0136f8f0 3654 HOST_WIDE_INT linear_step;
34e82342 3655 enum vect_def_type dt;
0136f8f0 3656 unsigned int align;
17b658af 3657 bool simd_lane_linear;
0136f8f0
AH
3658};
3659
17b658af
JJ
3660/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3661 is linear within simd lane (but not within whole loop), note it in
3662 *ARGINFO. */
3663
3664static void
3665vect_simd_lane_linear (tree op, struct loop *loop,
3666 struct simd_call_arg_info *arginfo)
3667{
355fe088 3668 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3669
3670 if (!is_gimple_assign (def_stmt)
3671 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3672 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3673 return;
3674
3675 tree base = gimple_assign_rhs1 (def_stmt);
3676 HOST_WIDE_INT linear_step = 0;
3677 tree v = gimple_assign_rhs2 (def_stmt);
3678 while (TREE_CODE (v) == SSA_NAME)
3679 {
3680 tree t;
3681 def_stmt = SSA_NAME_DEF_STMT (v);
3682 if (is_gimple_assign (def_stmt))
3683 switch (gimple_assign_rhs_code (def_stmt))
3684 {
3685 case PLUS_EXPR:
3686 t = gimple_assign_rhs2 (def_stmt);
3687 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3688 return;
3689 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3690 v = gimple_assign_rhs1 (def_stmt);
3691 continue;
3692 case MULT_EXPR:
3693 t = gimple_assign_rhs2 (def_stmt);
3694 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3695 return;
3696 linear_step = tree_to_shwi (t);
3697 v = gimple_assign_rhs1 (def_stmt);
3698 continue;
3699 CASE_CONVERT:
3700 t = gimple_assign_rhs1 (def_stmt);
3701 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3702 || (TYPE_PRECISION (TREE_TYPE (v))
3703 < TYPE_PRECISION (TREE_TYPE (t))))
3704 return;
3705 if (!linear_step)
3706 linear_step = 1;
3707 v = t;
3708 continue;
3709 default:
3710 return;
3711 }
8e4284d0 3712 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3713 && loop->simduid
3714 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3715 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3716 == loop->simduid))
3717 {
3718 if (!linear_step)
3719 linear_step = 1;
3720 arginfo->linear_step = linear_step;
3721 arginfo->op = base;
3722 arginfo->simd_lane_linear = true;
3723 return;
3724 }
3725 }
3726}
3727
cf1b2ba4
RS
3728/* Return the number of elements in vector type VECTYPE, which is associated
3729 with a SIMD clone. At present these vectors always have a constant
3730 length. */
3731
3732static unsigned HOST_WIDE_INT
3733simd_clone_subparts (tree vectype)
3734{
928686b1 3735 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3736}
3737
0136f8f0
AH
3738/* Function vectorizable_simd_clone_call.
3739
3740 Check if STMT performs a function call that can be vectorized
3741 by calling a simd clone of the function.
3742 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3743 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3744 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3745
3746static bool
355fe088 3747vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 3748 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 3749 stmt_vector_for_cost *)
0136f8f0
AH
3750{
3751 tree vec_dest;
3752 tree scalar_dest;
3753 tree op, type;
3754 tree vec_oprnd0 = NULL_TREE;
3755 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3756 tree vectype;
3757 unsigned int nunits;
3758 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3759 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3760 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3761 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3762 tree fndecl, new_temp;
0136f8f0 3763 int ncopies, j;
00426f9a 3764 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3765 vec<tree> vargs = vNULL;
3766 size_t i, nargs;
3767 tree lhs, rtype, ratype;
e7a74006 3768 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3769
3770 /* Is STMT a vectorizable call? */
3771 if (!is_gimple_call (stmt))
3772 return false;
3773
3774 fndecl = gimple_call_fndecl (stmt);
3775 if (fndecl == NULL_TREE)
3776 return false;
3777
d52f5295 3778 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3779 if (node == NULL || node->simd_clones == NULL)
3780 return false;
3781
3782 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3783 return false;
3784
66c16fd9
RB
3785 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3786 && ! vec_stmt)
0136f8f0
AH
3787 return false;
3788
3789 if (gimple_call_lhs (stmt)
3790 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3791 return false;
3792
3793 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3794
3795 vectype = STMT_VINFO_VECTYPE (stmt_info);
3796
3797 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3798 return false;
3799
3800 /* FORNOW */
fce57248 3801 if (slp_node)
0136f8f0
AH
3802 return false;
3803
3804 /* Process function arguments. */
3805 nargs = gimple_call_num_args (stmt);
3806
3807 /* Bail out if the function has zero arguments. */
3808 if (nargs == 0)
3809 return false;
3810
00426f9a 3811 arginfo.reserve (nargs, true);
0136f8f0
AH
3812
3813 for (i = 0; i < nargs; i++)
3814 {
3815 simd_call_arg_info thisarginfo;
3816 affine_iv iv;
3817
3818 thisarginfo.linear_step = 0;
3819 thisarginfo.align = 0;
3820 thisarginfo.op = NULL_TREE;
17b658af 3821 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3822
3823 op = gimple_call_arg (stmt, i);
894dd753 3824 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
81c40241 3825 &thisarginfo.vectype)
0136f8f0
AH
3826 || thisarginfo.dt == vect_uninitialized_def)
3827 {
3828 if (dump_enabled_p ())
3829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3830 "use not simple.\n");
0136f8f0
AH
3831 return false;
3832 }
3833
3834 if (thisarginfo.dt == vect_constant_def
3835 || thisarginfo.dt == vect_external_def)
3836 gcc_assert (thisarginfo.vectype == NULL_TREE);
3837 else
3838 gcc_assert (thisarginfo.vectype != NULL_TREE);
3839
6c9e85fb
JJ
3840 /* For linear arguments, the analyze phase should have saved
3841 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3842 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3843 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3844 {
3845 gcc_assert (vec_stmt);
3846 thisarginfo.linear_step
17b658af 3847 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3848 thisarginfo.op
17b658af
JJ
3849 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3850 thisarginfo.simd_lane_linear
3851 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3852 == boolean_true_node);
6c9e85fb
JJ
3853 /* If loop has been peeled for alignment, we need to adjust it. */
3854 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3855 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3856 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3857 {
3858 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3859 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3860 tree opt = TREE_TYPE (thisarginfo.op);
3861 bias = fold_convert (TREE_TYPE (step), bias);
3862 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3863 thisarginfo.op
3864 = fold_build2 (POINTER_TYPE_P (opt)
3865 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3866 thisarginfo.op, bias);
3867 }
3868 }
3869 else if (!vec_stmt
3870 && thisarginfo.dt != vect_constant_def
3871 && thisarginfo.dt != vect_external_def
3872 && loop_vinfo
3873 && TREE_CODE (op) == SSA_NAME
3874 && simple_iv (loop, loop_containing_stmt (stmt), op,
3875 &iv, false)
3876 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3877 {
3878 thisarginfo.linear_step = tree_to_shwi (iv.step);
3879 thisarginfo.op = iv.base;
3880 }
3881 else if ((thisarginfo.dt == vect_constant_def
3882 || thisarginfo.dt == vect_external_def)
3883 && POINTER_TYPE_P (TREE_TYPE (op)))
3884 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3885 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3886 linear too. */
3887 if (POINTER_TYPE_P (TREE_TYPE (op))
3888 && !thisarginfo.linear_step
3889 && !vec_stmt
3890 && thisarginfo.dt != vect_constant_def
3891 && thisarginfo.dt != vect_external_def
3892 && loop_vinfo
3893 && !slp_node
3894 && TREE_CODE (op) == SSA_NAME)
3895 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3896
3897 arginfo.quick_push (thisarginfo);
3898 }
3899
d9f21f6a
RS
3900 unsigned HOST_WIDE_INT vf;
3901 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3902 {
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3905 "not considering SIMD clones; not yet supported"
3906 " for variable-width vectors.\n");
3907 return NULL;
3908 }
3909
0136f8f0
AH
3910 unsigned int badness = 0;
3911 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3913 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3914 else
3915 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3916 n = n->simdclone->next_clone)
3917 {
3918 unsigned int this_badness = 0;
d9f21f6a 3919 if (n->simdclone->simdlen > vf
0136f8f0
AH
3920 || n->simdclone->nargs != nargs)
3921 continue;
d9f21f6a
RS
3922 if (n->simdclone->simdlen < vf)
3923 this_badness += (exact_log2 (vf)
0136f8f0
AH
3924 - exact_log2 (n->simdclone->simdlen)) * 1024;
3925 if (n->simdclone->inbranch)
3926 this_badness += 2048;
3927 int target_badness = targetm.simd_clone.usable (n);
3928 if (target_badness < 0)
3929 continue;
3930 this_badness += target_badness * 512;
3931 /* FORNOW: Have to add code to add the mask argument. */
3932 if (n->simdclone->inbranch)
3933 continue;
3934 for (i = 0; i < nargs; i++)
3935 {
3936 switch (n->simdclone->args[i].arg_type)
3937 {
3938 case SIMD_CLONE_ARG_TYPE_VECTOR:
3939 if (!useless_type_conversion_p
3940 (n->simdclone->args[i].orig_type,
3941 TREE_TYPE (gimple_call_arg (stmt, i))))
3942 i = -1;
3943 else if (arginfo[i].dt == vect_constant_def
3944 || arginfo[i].dt == vect_external_def
3945 || arginfo[i].linear_step)
3946 this_badness += 64;
3947 break;
3948 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3949 if (arginfo[i].dt != vect_constant_def
3950 && arginfo[i].dt != vect_external_def)
3951 i = -1;
3952 break;
3953 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3954 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3955 if (arginfo[i].dt == vect_constant_def
3956 || arginfo[i].dt == vect_external_def
3957 || (arginfo[i].linear_step
3958 != n->simdclone->args[i].linear_step))
3959 i = -1;
3960 break;
3961 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3962 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3963 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3964 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3965 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3966 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3967 /* FORNOW */
3968 i = -1;
3969 break;
3970 case SIMD_CLONE_ARG_TYPE_MASK:
3971 gcc_unreachable ();
3972 }
3973 if (i == (size_t) -1)
3974 break;
3975 if (n->simdclone->args[i].alignment > arginfo[i].align)
3976 {
3977 i = -1;
3978 break;
3979 }
3980 if (arginfo[i].align)
3981 this_badness += (exact_log2 (arginfo[i].align)
3982 - exact_log2 (n->simdclone->args[i].alignment));
3983 }
3984 if (i == (size_t) -1)
3985 continue;
3986 if (bestn == NULL || this_badness < badness)
3987 {
3988 bestn = n;
3989 badness = this_badness;
3990 }
3991 }
3992
3993 if (bestn == NULL)
00426f9a 3994 return false;
0136f8f0
AH
3995
3996 for (i = 0; i < nargs; i++)
3997 if ((arginfo[i].dt == vect_constant_def
3998 || arginfo[i].dt == vect_external_def)
3999 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4000 {
4001 arginfo[i].vectype
4002 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4003 i)));
4004 if (arginfo[i].vectype == NULL
cf1b2ba4 4005 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 4006 > bestn->simdclone->simdlen))
00426f9a 4007 return false;
0136f8f0
AH
4008 }
4009
4010 fndecl = bestn->decl;
4011 nunits = bestn->simdclone->simdlen;
d9f21f6a 4012 ncopies = vf / nunits;
0136f8f0
AH
4013
4014 /* If the function isn't const, only allow it in simd loops where user
4015 has asserted that at least nunits consecutive iterations can be
4016 performed using SIMD instructions. */
4017 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4018 && gimple_vuse (stmt))
00426f9a 4019 return false;
0136f8f0
AH
4020
4021 /* Sanity check: make sure that at least one copy of the vectorized stmt
4022 needs to be generated. */
4023 gcc_assert (ncopies >= 1);
4024
4025 if (!vec_stmt) /* transformation not required. */
4026 {
6c9e85fb
JJ
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4028 for (i = 0; i < nargs; i++)
7adb26f2
JJ
4029 if ((bestn->simdclone->args[i].arg_type
4030 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4031 || (bestn->simdclone->args[i].arg_type
4032 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 4033 {
17b658af 4034 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
4035 + 1);
4036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4037 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4038 ? size_type_node : TREE_TYPE (arginfo[i].op);
4039 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4040 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
4041 tree sll = arginfo[i].simd_lane_linear
4042 ? boolean_true_node : boolean_false_node;
4043 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 4044 }
0136f8f0 4045 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
adac3a68 4046 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
68435eb2 4047/* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
0136f8f0
AH
4048 return true;
4049 }
4050
67b8dbac 4051 /* Transform. */
0136f8f0
AH
4052
4053 if (dump_enabled_p ())
4054 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4055
4056 /* Handle def. */
4057 scalar_dest = gimple_call_lhs (stmt);
4058 vec_dest = NULL_TREE;
4059 rtype = NULL_TREE;
4060 ratype = NULL_TREE;
4061 if (scalar_dest)
4062 {
4063 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4064 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4065 if (TREE_CODE (rtype) == ARRAY_TYPE)
4066 {
4067 ratype = rtype;
4068 rtype = TREE_TYPE (ratype);
4069 }
4070 }
4071
4072 prev_stmt_info = NULL;
4073 for (j = 0; j < ncopies; ++j)
4074 {
4075 /* Build argument list for the vectorized call. */
4076 if (j == 0)
4077 vargs.create (nargs);
4078 else
4079 vargs.truncate (0);
4080
4081 for (i = 0; i < nargs; i++)
4082 {
4083 unsigned int k, l, m, o;
4084 tree atype;
4085 op = gimple_call_arg (stmt, i);
4086 switch (bestn->simdclone->args[i].arg_type)
4087 {
4088 case SIMD_CLONE_ARG_TYPE_VECTOR:
4089 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 4090 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
4091 for (m = j * o; m < (j + 1) * o; m++)
4092 {
cf1b2ba4
RS
4093 if (simd_clone_subparts (atype)
4094 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 4095 {
73a699ae 4096 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
4097 k = (simd_clone_subparts (arginfo[i].vectype)
4098 / simd_clone_subparts (atype));
0136f8f0
AH
4099 gcc_assert ((k & (k - 1)) == 0);
4100 if (m == 0)
4101 vec_oprnd0
81c40241 4102 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4103 else
4104 {
4105 vec_oprnd0 = arginfo[i].op;
4106 if ((m & (k - 1)) == 0)
4107 vec_oprnd0
4108 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4109 vec_oprnd0);
4110 }
4111 arginfo[i].op = vec_oprnd0;
4112 vec_oprnd0
4113 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 4114 bitsize_int (prec),
0136f8f0 4115 bitsize_int ((m & (k - 1)) * prec));
e1bd7296 4116 gassign *new_stmt
b731b390 4117 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4118 vec_oprnd0);
4119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4120 vargs.safe_push (gimple_assign_lhs (new_stmt));
4121 }
4122 else
4123 {
cf1b2ba4
RS
4124 k = (simd_clone_subparts (atype)
4125 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
4126 gcc_assert ((k & (k - 1)) == 0);
4127 vec<constructor_elt, va_gc> *ctor_elts;
4128 if (k != 1)
4129 vec_alloc (ctor_elts, k);
4130 else
4131 ctor_elts = NULL;
4132 for (l = 0; l < k; l++)
4133 {
4134 if (m == 0 && l == 0)
4135 vec_oprnd0
81c40241 4136 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
4137 else
4138 vec_oprnd0
4139 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4140 arginfo[i].op);
4141 arginfo[i].op = vec_oprnd0;
4142 if (k == 1)
4143 break;
4144 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4145 vec_oprnd0);
4146 }
4147 if (k == 1)
4148 vargs.safe_push (vec_oprnd0);
4149 else
4150 {
4151 vec_oprnd0 = build_constructor (atype, ctor_elts);
e1bd7296 4152 gassign *new_stmt
b731b390 4153 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
4154 vec_oprnd0);
4155 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4156 vargs.safe_push (gimple_assign_lhs (new_stmt));
4157 }
4158 }
4159 }
4160 break;
4161 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4162 vargs.safe_push (op);
4163 break;
4164 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4165 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4166 if (j == 0)
4167 {
4168 gimple_seq stmts;
4169 arginfo[i].op
4170 = force_gimple_operand (arginfo[i].op, &stmts, true,
4171 NULL_TREE);
4172 if (stmts != NULL)
4173 {
4174 basic_block new_bb;
4175 edge pe = loop_preheader_edge (loop);
4176 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4177 gcc_assert (!new_bb);
4178 }
17b658af
JJ
4179 if (arginfo[i].simd_lane_linear)
4180 {
4181 vargs.safe_push (arginfo[i].op);
4182 break;
4183 }
b731b390 4184 tree phi_res = copy_ssa_name (op);
538dd0b7 4185 gphi *new_phi = create_phi_node (phi_res, loop->header);
4fbeb363 4186 loop_vinfo->add_stmt (new_phi);
0136f8f0
AH
4187 add_phi_arg (new_phi, arginfo[i].op,
4188 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4189 enum tree_code code
4190 = POINTER_TYPE_P (TREE_TYPE (op))
4191 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4192 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4193 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4194 widest_int cst
4195 = wi::mul (bestn->simdclone->args[i].linear_step,
4196 ncopies * nunits);
4197 tree tcst = wide_int_to_tree (type, cst);
b731b390 4198 tree phi_arg = copy_ssa_name (op);
e1bd7296 4199 gassign *new_stmt
0d0e4a03 4200 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4201 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4202 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4fbeb363 4203 loop_vinfo->add_stmt (new_stmt);
0136f8f0
AH
4204 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4205 UNKNOWN_LOCATION);
4206 arginfo[i].op = phi_res;
4207 vargs.safe_push (phi_res);
4208 }
4209 else
4210 {
4211 enum tree_code code
4212 = POINTER_TYPE_P (TREE_TYPE (op))
4213 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4214 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4215 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4216 widest_int cst
4217 = wi::mul (bestn->simdclone->args[i].linear_step,
4218 j * nunits);
4219 tree tcst = wide_int_to_tree (type, cst);
b731b390 4220 new_temp = make_ssa_name (TREE_TYPE (op));
e1bd7296
RS
4221 gassign *new_stmt
4222 = gimple_build_assign (new_temp, code,
4223 arginfo[i].op, tcst);
0136f8f0
AH
4224 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4225 vargs.safe_push (new_temp);
4226 }
4227 break;
7adb26f2
JJ
4228 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4229 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4230 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4231 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4232 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4233 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4234 default:
4235 gcc_unreachable ();
4236 }
4237 }
4238
e1bd7296 4239 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
0136f8f0
AH
4240 if (vec_dest)
4241 {
cf1b2ba4 4242 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4243 if (ratype)
b731b390 4244 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4245 else if (simd_clone_subparts (vectype)
4246 == simd_clone_subparts (rtype))
e1bd7296 4247 new_temp = make_ssa_name (vec_dest, new_call);
0136f8f0 4248 else
e1bd7296
RS
4249 new_temp = make_ssa_name (rtype, new_call);
4250 gimple_call_set_lhs (new_call, new_temp);
0136f8f0 4251 }
e1bd7296
RS
4252 stmt_vec_info new_stmt_info
4253 = vect_finish_stmt_generation (stmt, new_call, gsi);
0136f8f0
AH
4254
4255 if (vec_dest)
4256 {
cf1b2ba4 4257 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4258 {
4259 unsigned int k, l;
73a699ae
RS
4260 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4261 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4262 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4263 gcc_assert ((k & (k - 1)) == 0);
4264 for (l = 0; l < k; l++)
4265 {
4266 tree t;
4267 if (ratype)
4268 {
4269 t = build_fold_addr_expr (new_temp);
4270 t = build2 (MEM_REF, vectype, t,
73a699ae 4271 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4272 }
4273 else
4274 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4275 bitsize_int (prec), bitsize_int (l * prec));
e1bd7296 4276 gimple *new_stmt
b731b390 4277 = gimple_build_assign (make_ssa_name (vectype), t);
e1bd7296
RS
4278 new_stmt_info
4279 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4280
0136f8f0 4281 if (j == 0 && l == 0)
e1bd7296
RS
4282 STMT_VINFO_VEC_STMT (stmt_info)
4283 = *vec_stmt = new_stmt_info;
0136f8f0 4284 else
e1bd7296 4285 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4286
e1bd7296 4287 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4288 }
4289
4290 if (ratype)
3ba4ff41 4291 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4292 continue;
4293 }
cf1b2ba4 4294 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4295 {
cf1b2ba4
RS
4296 unsigned int k = (simd_clone_subparts (vectype)
4297 / simd_clone_subparts (rtype));
0136f8f0
AH
4298 gcc_assert ((k & (k - 1)) == 0);
4299 if ((j & (k - 1)) == 0)
4300 vec_alloc (ret_ctor_elts, k);
4301 if (ratype)
4302 {
cf1b2ba4 4303 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4304 for (m = 0; m < o; m++)
4305 {
4306 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4307 size_int (m), NULL_TREE, NULL_TREE);
e1bd7296 4308 gimple *new_stmt
b731b390 4309 = gimple_build_assign (make_ssa_name (rtype), tem);
e1bd7296
RS
4310 new_stmt_info
4311 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
0136f8f0
AH
4312 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4313 gimple_assign_lhs (new_stmt));
4314 }
3ba4ff41 4315 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4316 }
4317 else
4318 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4319 if ((j & (k - 1)) != k - 1)
4320 continue;
4321 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
e1bd7296 4322 gimple *new_stmt
b731b390 4323 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
e1bd7296
RS
4324 new_stmt_info
4325 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
0136f8f0
AH
4326
4327 if ((unsigned) j == k - 1)
e1bd7296 4328 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
0136f8f0 4329 else
e1bd7296 4330 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4331
e1bd7296 4332 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4333 continue;
4334 }
4335 else if (ratype)
4336 {
4337 tree t = build_fold_addr_expr (new_temp);
4338 t = build2 (MEM_REF, vectype, t,
4339 build_int_cst (TREE_TYPE (t), 0));
e1bd7296 4340 gimple *new_stmt
b731b390 4341 = gimple_build_assign (make_ssa_name (vec_dest), t);
e1bd7296
RS
4342 new_stmt_info
4343 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3ba4ff41 4344 vect_clobber_variable (stmt, gsi, new_temp);
0136f8f0
AH
4345 }
4346 }
4347
4348 if (j == 0)
e1bd7296 4349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
0136f8f0 4350 else
e1bd7296 4351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
0136f8f0 4352
e1bd7296 4353 prev_stmt_info = new_stmt_info;
0136f8f0
AH
4354 }
4355
4356 vargs.release ();
4357
4358 /* The call in STMT might prevent it from being removed in dce.
4359 We however cannot remove it here, due to the way the ssa name
4360 it defines is mapped to the new definition. So just replace
4361 rhs of the statement with something harmless. */
4362
4363 if (slp_node)
4364 return true;
4365
e1bd7296 4366 gimple *new_stmt;
0136f8f0
AH
4367 if (scalar_dest)
4368 {
4369 type = TREE_TYPE (scalar_dest);
4370 if (is_pattern_stmt_p (stmt_info))
10681ce8 4371 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
0136f8f0
AH
4372 else
4373 lhs = gimple_call_lhs (stmt);
4374 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4375 }
4376 else
4377 new_stmt = gimple_build_nop ();
4378 set_vinfo_for_stmt (new_stmt, stmt_info);
4379 set_vinfo_for_stmt (stmt, NULL);
4380 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4381 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4382 unlink_stmt_vdef (stmt);
4383
4384 return true;
4385}
4386
4387
ebfd146a
IR
4388/* Function vect_gen_widened_results_half
4389
4390 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4391 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4392 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4393 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4394 needs to be created (DECL is a function-decl of a target-builtin).
4395 STMT is the original scalar stmt that we are vectorizing. */
4396
355fe088 4397static gimple *
ebfd146a
IR
4398vect_gen_widened_results_half (enum tree_code code,
4399 tree decl,
4400 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4401 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4402 gimple *stmt)
b8698a0f 4403{
355fe088 4404 gimple *new_stmt;
b8698a0f
L
4405 tree new_temp;
4406
4407 /* Generate half of the widened result: */
4408 if (code == CALL_EXPR)
4409 {
4410 /* Target specific support */
ebfd146a
IR
4411 if (op_type == binary_op)
4412 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4413 else
4414 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4415 new_temp = make_ssa_name (vec_dest, new_stmt);
4416 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4417 }
4418 else
ebfd146a 4419 {
b8698a0f
L
4420 /* Generic support */
4421 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4422 if (op_type != binary_op)
4423 vec_oprnd1 = NULL;
0d0e4a03 4424 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4425 new_temp = make_ssa_name (vec_dest, new_stmt);
4426 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4427 }
ebfd146a
IR
4428 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4429
ebfd146a
IR
4430 return new_stmt;
4431}
4432
4a00c761
JJ
4433
4434/* Get vectorized definitions for loop-based vectorization. For the first
4435 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4436 scalar operand), and for the rest we get a copy with
4437 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4438 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4439 The vectors are collected into VEC_OPRNDS. */
4440
4441static void
355fe088 4442vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4443 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4444{
4445 tree vec_oprnd;
4446
4447 /* Get first vector operand. */
4448 /* All the vector operands except the very first one (that is scalar oprnd)
4449 are stmt copies. */
4450 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4451 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4452 else
4453 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4454
9771b263 4455 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4456
4457 /* Get second vector operand. */
4458 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4459 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4460
4461 *oprnd = vec_oprnd;
4462
4463 /* For conversion in multiple steps, continue to get operands
4464 recursively. */
4465 if (multi_step_cvt)
4466 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4467}
4468
4469
4470/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4471 For multi-step conversions store the resulting vectors and call the function
4472 recursively. */
4473
4474static void
9771b263 4475vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4476 int multi_step_cvt, gimple *stmt,
9771b263 4477 vec<tree> vec_dsts,
4a00c761
JJ
4478 gimple_stmt_iterator *gsi,
4479 slp_tree slp_node, enum tree_code code,
4480 stmt_vec_info *prev_stmt_info)
4481{
4482 unsigned int i;
4483 tree vop0, vop1, new_tmp, vec_dest;
4a00c761
JJ
4484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4485
9771b263 4486 vec_dest = vec_dsts.pop ();
4a00c761 4487
9771b263 4488 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4489 {
4490 /* Create demotion operation. */
9771b263
DN
4491 vop0 = (*vec_oprnds)[i];
4492 vop1 = (*vec_oprnds)[i + 1];
e1bd7296 4493 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4494 new_tmp = make_ssa_name (vec_dest, new_stmt);
4495 gimple_assign_set_lhs (new_stmt, new_tmp);
e1bd7296
RS
4496 stmt_vec_info new_stmt_info
4497 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
4498
4499 if (multi_step_cvt)
4500 /* Store the resulting vector for next recursive call. */
9771b263 4501 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4502 else
4503 {
4504 /* This is the last step of the conversion sequence. Store the
4505 vectors in SLP_NODE or in vector info of the scalar statement
4506 (or in STMT_VINFO_RELATED_STMT chain). */
4507 if (slp_node)
e1bd7296 4508 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4a00c761 4509 else
c689ce1e
RB
4510 {
4511 if (!*prev_stmt_info)
e1bd7296 4512 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
c689ce1e 4513 else
e1bd7296 4514 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4a00c761 4515
e1bd7296 4516 *prev_stmt_info = new_stmt_info;
c689ce1e 4517 }
4a00c761
JJ
4518 }
4519 }
4520
4521 /* For multi-step demotion operations we first generate demotion operations
4522 from the source type to the intermediate types, and then combine the
4523 results (stored in VEC_OPRNDS) in demotion operation to the destination
4524 type. */
4525 if (multi_step_cvt)
4526 {
4527 /* At each level of recursion we have half of the operands we had at the
4528 previous level. */
9771b263 4529 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4530 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4531 stmt, vec_dsts, gsi, slp_node,
4532 VEC_PACK_TRUNC_EXPR,
4533 prev_stmt_info);
4534 }
4535
9771b263 4536 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4537}
4538
4539
4540/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4541 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4542 the resulting vectors and call the function recursively. */
4543
4544static void
9771b263
DN
4545vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4546 vec<tree> *vec_oprnds1,
355fe088 4547 gimple *stmt, tree vec_dest,
4a00c761
JJ
4548 gimple_stmt_iterator *gsi,
4549 enum tree_code code1,
4550 enum tree_code code2, tree decl1,
4551 tree decl2, int op_type)
4552{
4553 int i;
4554 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4555 gimple *new_stmt1, *new_stmt2;
6e1aa848 4556 vec<tree> vec_tmp = vNULL;
4a00c761 4557
9771b263
DN
4558 vec_tmp.create (vec_oprnds0->length () * 2);
4559 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4560 {
4561 if (op_type == binary_op)
9771b263 4562 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4563 else
4564 vop1 = NULL_TREE;
4565
4566 /* Generate the two halves of promotion operation. */
4567 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4568 op_type, vec_dest, gsi, stmt);
4569 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4570 op_type, vec_dest, gsi, stmt);
4571 if (is_gimple_call (new_stmt1))
4572 {
4573 new_tmp1 = gimple_call_lhs (new_stmt1);
4574 new_tmp2 = gimple_call_lhs (new_stmt2);
4575 }
4576 else
4577 {
4578 new_tmp1 = gimple_assign_lhs (new_stmt1);
4579 new_tmp2 = gimple_assign_lhs (new_stmt2);
4580 }
4581
4582 /* Store the results for the next step. */
9771b263
DN
4583 vec_tmp.quick_push (new_tmp1);
4584 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4585 }
4586
689eaba3 4587 vec_oprnds0->release ();
4a00c761
JJ
4588 *vec_oprnds0 = vec_tmp;
4589}
4590
4591
b8698a0f
L
4592/* Check if STMT performs a conversion operation, that can be vectorized.
4593 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4594 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4595 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4596
4597static bool
355fe088 4598vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 4599 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 4600 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
4601{
4602 tree vec_dest;
4603 tree scalar_dest;
4a00c761 4604 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4605 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4606 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4607 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4608 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4609 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4610 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4611 tree new_temp;
ebfd146a 4612 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4613 int ndts = 2;
ebfd146a 4614 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4615 poly_uint64 nunits_in;
4616 poly_uint64 nunits_out;
ebfd146a 4617 tree vectype_out, vectype_in;
4a00c761
JJ
4618 int ncopies, i, j;
4619 tree lhs_type, rhs_type;
ebfd146a 4620 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4621 vec<tree> vec_oprnds0 = vNULL;
4622 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4623 tree vop0;
4a00c761 4624 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4625 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4626 int multi_step_cvt = 0;
6e1aa848 4627 vec<tree> interm_types = vNULL;
4a00c761
JJ
4628 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4629 int op_type;
4a00c761 4630 unsigned short fltsz;
ebfd146a
IR
4631
4632 /* Is STMT a vectorizable conversion? */
4633
4a00c761 4634 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4635 return false;
4636
66c16fd9
RB
4637 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4638 && ! vec_stmt)
ebfd146a
IR
4639 return false;
4640
4641 if (!is_gimple_assign (stmt))
4642 return false;
4643
4644 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4645 return false;
4646
4647 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4648 if (!CONVERT_EXPR_CODE_P (code)
4649 && code != FIX_TRUNC_EXPR
4650 && code != FLOAT_EXPR
4651 && code != WIDEN_MULT_EXPR
4652 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4653 return false;
4654
4a00c761
JJ
4655 op_type = TREE_CODE_LENGTH (code);
4656
ebfd146a 4657 /* Check types of lhs and rhs. */
b690cc0f 4658 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4659 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4660 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4661
ebfd146a
IR
4662 op0 = gimple_assign_rhs1 (stmt);
4663 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4664
4665 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4666 && !((INTEGRAL_TYPE_P (lhs_type)
4667 && INTEGRAL_TYPE_P (rhs_type))
4668 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4669 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4670 return false;
4671
e6f5c25d
IE
4672 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4673 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4674 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4675 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4676 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4677 {
73fbfcad 4678 if (dump_enabled_p ())
78c60e3d 4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4680 "type conversion to/from bit-precision unsupported."
4681 "\n");
4a00c761
JJ
4682 return false;
4683 }
4684
b690cc0f 4685 /* Check the operands of the operation. */
894dd753 4686 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
b690cc0f 4687 {
73fbfcad 4688 if (dump_enabled_p ())
78c60e3d 4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4690 "use not simple.\n");
b690cc0f
RG
4691 return false;
4692 }
4a00c761
JJ
4693 if (op_type == binary_op)
4694 {
4695 bool ok;
4696
4697 op1 = gimple_assign_rhs2 (stmt);
4698 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4699 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4700 OP1. */
4701 if (CONSTANT_CLASS_P (op0))
894dd753 4702 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4a00c761 4703 else
894dd753 4704 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4a00c761
JJ
4705
4706 if (!ok)
4707 {
73fbfcad 4708 if (dump_enabled_p ())
78c60e3d 4709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4710 "use not simple.\n");
4a00c761
JJ
4711 return false;
4712 }
4713 }
4714
b690cc0f
RG
4715 /* If op0 is an external or constant defs use a vector type of
4716 the same size as the output vector type. */
ebfd146a 4717 if (!vectype_in)
b690cc0f 4718 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4719 if (vec_stmt)
4720 gcc_assert (vectype_in);
4721 if (!vectype_in)
4722 {
73fbfcad 4723 if (dump_enabled_p ())
4a00c761 4724 {
78c60e3d
SS
4725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4726 "no vectype for scalar type ");
4727 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4728 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4729 }
7d8930a0
IR
4730
4731 return false;
4732 }
ebfd146a 4733
e6f5c25d
IE
4734 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4735 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4736 {
4737 if (dump_enabled_p ())
4738 {
4739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4740 "can't convert between boolean and non "
4741 "boolean vectors");
4742 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4743 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4744 }
4745
4746 return false;
4747 }
4748
b690cc0f
RG
4749 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4750 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4751 if (known_eq (nunits_out, nunits_in))
ebfd146a 4752 modifier = NONE;
062d5ccc
RS
4753 else if (multiple_p (nunits_out, nunits_in))
4754 modifier = NARROW;
ebfd146a 4755 else
062d5ccc
RS
4756 {
4757 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4758 modifier = WIDEN;
4759 }
ebfd146a 4760
ff802fa1
IR
4761 /* Multiple types in SLP are handled by creating the appropriate number of
4762 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4763 case of SLP. */
fce57248 4764 if (slp_node)
ebfd146a 4765 ncopies = 1;
4a00c761 4766 else if (modifier == NARROW)
e8f142e2 4767 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4768 else
e8f142e2 4769 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4770
ebfd146a
IR
4771 /* Sanity check: make sure that at least one copy of the vectorized stmt
4772 needs to be generated. */
4773 gcc_assert (ncopies >= 1);
4774
16d22000
RS
4775 bool found_mode = false;
4776 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4777 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4778 opt_scalar_mode rhs_mode_iter;
b397965c 4779
ebfd146a 4780 /* Supportable by target? */
4a00c761 4781 switch (modifier)
ebfd146a 4782 {
4a00c761
JJ
4783 case NONE:
4784 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4785 return false;
4786 if (supportable_convert_operation (code, vectype_out, vectype_in,
4787 &decl1, &code1))
4788 break;
4789 /* FALLTHRU */
4790 unsupported:
73fbfcad 4791 if (dump_enabled_p ())
78c60e3d 4792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4793 "conversion not supported by target.\n");
ebfd146a 4794 return false;
ebfd146a 4795
4a00c761
JJ
4796 case WIDEN:
4797 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4798 &code1, &code2, &multi_step_cvt,
4799 &interm_types))
4a00c761
JJ
4800 {
4801 /* Binary widening operation can only be supported directly by the
4802 architecture. */
4803 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4804 break;
4805 }
4806
4807 if (code != FLOAT_EXPR
b397965c 4808 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4809 goto unsupported;
4810
b397965c 4811 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4812 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4813 {
16d22000 4814 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4815 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4816 break;
4817
4a00c761
JJ
4818 cvt_type
4819 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4820 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4821 if (cvt_type == NULL_TREE)
4822 goto unsupported;
4823
4824 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4825 {
4826 if (!supportable_convert_operation (code, vectype_out,
4827 cvt_type, &decl1, &codecvt1))
4828 goto unsupported;
4829 }
4830 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4831 cvt_type, &codecvt1,
4832 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4833 &interm_types))
4834 continue;
4835 else
4836 gcc_assert (multi_step_cvt == 0);
4837
4838 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4839 vectype_in, &code1, &code2,
4840 &multi_step_cvt, &interm_types))
16d22000
RS
4841 {
4842 found_mode = true;
4843 break;
4844 }
4a00c761
JJ
4845 }
4846
16d22000 4847 if (!found_mode)
4a00c761
JJ
4848 goto unsupported;
4849
4850 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4851 codecvt2 = ERROR_MARK;
4852 else
4853 {
4854 multi_step_cvt++;
9771b263 4855 interm_types.safe_push (cvt_type);
4a00c761
JJ
4856 cvt_type = NULL_TREE;
4857 }
4858 break;
4859
4860 case NARROW:
4861 gcc_assert (op_type == unary_op);
4862 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4863 &code1, &multi_step_cvt,
4864 &interm_types))
4865 break;
4866
4867 if (code != FIX_TRUNC_EXPR
b397965c 4868 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4869 goto unsupported;
4870
4a00c761
JJ
4871 cvt_type
4872 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4873 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4874 if (cvt_type == NULL_TREE)
4875 goto unsupported;
4876 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4877 &decl1, &codecvt1))
4878 goto unsupported;
4879 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4880 &code1, &multi_step_cvt,
4881 &interm_types))
4882 break;
4883 goto unsupported;
4884
4885 default:
4886 gcc_unreachable ();
ebfd146a
IR
4887 }
4888
4889 if (!vec_stmt) /* transformation not required. */
4890 {
adac3a68 4891 DUMP_VECT_SCOPE ("vectorizable_conversion");
4a00c761 4892 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4893 {
4894 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
68435eb2
RB
4895 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4896 cost_vec);
8bd37302 4897 }
4a00c761
JJ
4898 else if (modifier == NARROW)
4899 {
4900 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
68435eb2
RB
4901 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4902 cost_vec);
4a00c761
JJ
4903 }
4904 else
4905 {
4906 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
68435eb2
RB
4907 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4908 cost_vec);
4a00c761 4909 }
9771b263 4910 interm_types.release ();
ebfd146a
IR
4911 return true;
4912 }
4913
67b8dbac 4914 /* Transform. */
73fbfcad 4915 if (dump_enabled_p ())
78c60e3d 4916 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4917 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4918
4a00c761
JJ
4919 if (op_type == binary_op)
4920 {
4921 if (CONSTANT_CLASS_P (op0))
4922 op0 = fold_convert (TREE_TYPE (op1), op0);
4923 else if (CONSTANT_CLASS_P (op1))
4924 op1 = fold_convert (TREE_TYPE (op0), op1);
4925 }
4926
4927 /* In case of multi-step conversion, we first generate conversion operations
4928 to the intermediate types, and then from that types to the final one.
4929 We create vector destinations for the intermediate type (TYPES) received
4930 from supportable_*_operation, and store them in the correct order
4931 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4932 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4933 vec_dest = vect_create_destination_var (scalar_dest,
4934 (cvt_type && modifier == WIDEN)
4935 ? cvt_type : vectype_out);
9771b263 4936 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4937
4938 if (multi_step_cvt)
4939 {
9771b263
DN
4940 for (i = interm_types.length () - 1;
4941 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4942 {
4943 vec_dest = vect_create_destination_var (scalar_dest,
4944 intermediate_type);
9771b263 4945 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4946 }
4947 }
ebfd146a 4948
4a00c761 4949 if (cvt_type)
82294ec1
JJ
4950 vec_dest = vect_create_destination_var (scalar_dest,
4951 modifier == WIDEN
4952 ? vectype_out : cvt_type);
4a00c761
JJ
4953
4954 if (!slp_node)
4955 {
30862efc 4956 if (modifier == WIDEN)
4a00c761 4957 {
c3284718 4958 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4959 if (op_type == binary_op)
9771b263 4960 vec_oprnds1.create (1);
4a00c761 4961 }
30862efc 4962 else if (modifier == NARROW)
9771b263
DN
4963 vec_oprnds0.create (
4964 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4965 }
4966 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4967 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4968
4a00c761 4969 last_oprnd = op0;
ebfd146a
IR
4970 prev_stmt_info = NULL;
4971 switch (modifier)
4972 {
4973 case NONE:
4974 for (j = 0; j < ncopies; j++)
4975 {
ebfd146a 4976 if (j == 0)
306b0c92 4977 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4978 else
4979 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4980
9771b263 4981 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761 4982 {
e1bd7296 4983 stmt_vec_info new_stmt_info;
4a00c761
JJ
4984 /* Arguments are ready, create the new vector stmt. */
4985 if (code1 == CALL_EXPR)
4986 {
e1bd7296 4987 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
4988 new_temp = make_ssa_name (vec_dest, new_stmt);
4989 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296
RS
4990 new_stmt_info
4991 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
4992 }
4993 else
4994 {
4995 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
e1bd7296
RS
4996 gassign *new_stmt
4997 = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4998 new_temp = make_ssa_name (vec_dest, new_stmt);
4999 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296
RS
5000 new_stmt_info
5001 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
5002 }
5003
4a00c761 5004 if (slp_node)
e1bd7296 5005 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
225ce44b
RB
5006 else
5007 {
5008 if (!prev_stmt_info)
e1bd7296
RS
5009 STMT_VINFO_VEC_STMT (stmt_info)
5010 = *vec_stmt = new_stmt_info;
225ce44b 5011 else
e1bd7296
RS
5012 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5013 prev_stmt_info = new_stmt_info;
225ce44b 5014 }
4a00c761 5015 }
ebfd146a
IR
5016 }
5017 break;
5018
5019 case WIDEN:
5020 /* In case the vectorization factor (VF) is bigger than the number
5021 of elements that we can fit in a vectype (nunits), we have to
5022 generate more than one vector stmt - i.e - we need to "unroll"
5023 the vector stmt by a factor VF/nunits. */
5024 for (j = 0; j < ncopies; j++)
5025 {
4a00c761 5026 /* Handle uses. */
ebfd146a 5027 if (j == 0)
4a00c761
JJ
5028 {
5029 if (slp_node)
5030 {
5031 if (code == WIDEN_LSHIFT_EXPR)
5032 {
5033 unsigned int k;
ebfd146a 5034
4a00c761
JJ
5035 vec_oprnd1 = op1;
5036 /* Store vec_oprnd1 for every vector stmt to be created
5037 for SLP_NODE. We check during the analysis that all
5038 the shift arguments are the same. */
5039 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5040 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5041
5042 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5043 slp_node);
4a00c761
JJ
5044 }
5045 else
5046 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 5047 &vec_oprnds1, slp_node);
4a00c761
JJ
5048 }
5049 else
5050 {
81c40241 5051 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 5052 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5053 if (op_type == binary_op)
5054 {
5055 if (code == WIDEN_LSHIFT_EXPR)
5056 vec_oprnd1 = op1;
5057 else
81c40241 5058 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 5059 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5060 }
5061 }
5062 }
ebfd146a 5063 else
4a00c761
JJ
5064 {
5065 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
5066 vec_oprnds0.truncate (0);
5067 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
5068 if (op_type == binary_op)
5069 {
5070 if (code == WIDEN_LSHIFT_EXPR)
5071 vec_oprnd1 = op1;
5072 else
5073 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5074 vec_oprnd1);
9771b263
DN
5075 vec_oprnds1.truncate (0);
5076 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
5077 }
5078 }
ebfd146a 5079
4a00c761
JJ
5080 /* Arguments are ready. Create the new vector stmts. */
5081 for (i = multi_step_cvt; i >= 0; i--)
5082 {
9771b263 5083 tree this_dest = vec_dsts[i];
4a00c761
JJ
5084 enum tree_code c1 = code1, c2 = code2;
5085 if (i == 0 && codecvt2 != ERROR_MARK)
5086 {
5087 c1 = codecvt1;
5088 c2 = codecvt2;
5089 }
5090 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5091 &vec_oprnds1,
5092 stmt, this_dest, gsi,
5093 c1, c2, decl1, decl2,
5094 op_type);
5095 }
5096
9771b263 5097 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761 5098 {
e1bd7296 5099 stmt_vec_info new_stmt_info;
4a00c761
JJ
5100 if (cvt_type)
5101 {
5102 if (codecvt1 == CALL_EXPR)
5103 {
e1bd7296 5104 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
5105 new_temp = make_ssa_name (vec_dest, new_stmt);
5106 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296
RS
5107 new_stmt_info
5108 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
5109 }
5110 else
5111 {
5112 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5113 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
5114 gassign *new_stmt
5115 = gimple_build_assign (new_temp, codecvt1, vop0);
5116 new_stmt_info
5117 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761 5118 }
4a00c761
JJ
5119 }
5120 else
e1bd7296 5121 new_stmt_info = vinfo->lookup_def (vop0);
4a00c761
JJ
5122
5123 if (slp_node)
e1bd7296 5124 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4a00c761 5125 else
c689ce1e
RB
5126 {
5127 if (!prev_stmt_info)
e1bd7296 5128 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
c689ce1e 5129 else
e1bd7296
RS
5130 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5131 prev_stmt_info = new_stmt_info;
c689ce1e 5132 }
4a00c761 5133 }
ebfd146a 5134 }
4a00c761
JJ
5135
5136 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
5137 break;
5138
5139 case NARROW:
5140 /* In case the vectorization factor (VF) is bigger than the number
5141 of elements that we can fit in a vectype (nunits), we have to
5142 generate more than one vector stmt - i.e - we need to "unroll"
5143 the vector stmt by a factor VF/nunits. */
5144 for (j = 0; j < ncopies; j++)
5145 {
5146 /* Handle uses. */
4a00c761
JJ
5147 if (slp_node)
5148 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5149 slp_node);
ebfd146a
IR
5150 else
5151 {
9771b263 5152 vec_oprnds0.truncate (0);
4a00c761
JJ
5153 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5154 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
5155 }
5156
4a00c761
JJ
5157 /* Arguments are ready. Create the new vector stmts. */
5158 if (cvt_type)
9771b263 5159 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5160 {
5161 if (codecvt1 == CALL_EXPR)
5162 {
e1bd7296 5163 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4a00c761
JJ
5164 new_temp = make_ssa_name (vec_dest, new_stmt);
5165 gimple_call_set_lhs (new_stmt, new_temp);
e1bd7296 5166 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761
JJ
5167 }
5168 else
5169 {
5170 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5171 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
5172 gassign *new_stmt
5173 = gimple_build_assign (new_temp, codecvt1, vop0);
5174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4a00c761 5175 }
ebfd146a 5176
9771b263 5177 vec_oprnds0[i] = new_temp;
4a00c761 5178 }
ebfd146a 5179
4a00c761
JJ
5180 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5181 stmt, vec_dsts, gsi,
5182 slp_node, code1,
5183 &prev_stmt_info);
ebfd146a
IR
5184 }
5185
5186 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5187 break;
ebfd146a
IR
5188 }
5189
9771b263
DN
5190 vec_oprnds0.release ();
5191 vec_oprnds1.release ();
9771b263 5192 interm_types.release ();
ebfd146a
IR
5193
5194 return true;
5195}
ff802fa1
IR
5196
5197
ebfd146a
IR
5198/* Function vectorizable_assignment.
5199
b8698a0f
L
5200 Check if STMT performs an assignment (copy) that can be vectorized.
5201 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5202 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5203 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5204
5205static bool
355fe088 5206vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 5207 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5208 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
5209{
5210 tree vec_dest;
5211 tree scalar_dest;
5212 tree op;
5213 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5214 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5215 tree new_temp;
4fc5ebf1
JG
5216 enum vect_def_type dt[1] = {vect_unknown_def_type};
5217 int ndts = 1;
ebfd146a 5218 int ncopies;
f18b55bd 5219 int i, j;
6e1aa848 5220 vec<tree> vec_oprnds = vNULL;
ebfd146a 5221 tree vop;
a70d6342 5222 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5223 vec_info *vinfo = stmt_info->vinfo;
f18b55bd 5224 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5225 enum tree_code code;
5226 tree vectype_in;
ebfd146a 5227
a70d6342 5228 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5229 return false;
5230
66c16fd9
RB
5231 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5232 && ! vec_stmt)
ebfd146a
IR
5233 return false;
5234
5235 /* Is vectorizable assignment? */
5236 if (!is_gimple_assign (stmt))
5237 return false;
5238
5239 scalar_dest = gimple_assign_lhs (stmt);
5240 if (TREE_CODE (scalar_dest) != SSA_NAME)
5241 return false;
5242
fde9c428 5243 code = gimple_assign_rhs_code (stmt);
ebfd146a 5244 if (gimple_assign_single_p (stmt)
fde9c428
RG
5245 || code == PAREN_EXPR
5246 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5247 op = gimple_assign_rhs1 (stmt);
5248 else
5249 return false;
5250
7b7ec6c5
RG
5251 if (code == VIEW_CONVERT_EXPR)
5252 op = TREE_OPERAND (op, 0);
5253
465c8c19 5254 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5255 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5256
5257 /* Multiple types in SLP are handled by creating the appropriate number of
5258 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5259 case of SLP. */
fce57248 5260 if (slp_node)
465c8c19
JJ
5261 ncopies = 1;
5262 else
e8f142e2 5263 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5264
5265 gcc_assert (ncopies >= 1);
5266
894dd753 5267 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
ebfd146a 5268 {
73fbfcad 5269 if (dump_enabled_p ())
78c60e3d 5270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5271 "use not simple.\n");
ebfd146a
IR
5272 return false;
5273 }
5274
fde9c428
RG
5275 /* We can handle NOP_EXPR conversions that do not change the number
5276 of elements or the vector size. */
7b7ec6c5
RG
5277 if ((CONVERT_EXPR_CODE_P (code)
5278 || code == VIEW_CONVERT_EXPR)
fde9c428 5279 && (!vectype_in
928686b1 5280 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5281 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5282 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5283 return false;
5284
7b7b1813
RG
5285 /* We do not handle bit-precision changes. */
5286 if ((CONVERT_EXPR_CODE_P (code)
5287 || code == VIEW_CONVERT_EXPR)
5288 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5289 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5290 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5291 /* But a conversion that does not change the bit-pattern is ok. */
5292 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5293 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5294 && TYPE_UNSIGNED (TREE_TYPE (op)))
5295 /* Conversion between boolean types of different sizes is
5296 a simple assignment in case their vectypes are same
5297 boolean vectors. */
5298 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5299 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5300 {
73fbfcad 5301 if (dump_enabled_p ())
78c60e3d
SS
5302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5303 "type conversion to/from bit-precision "
e645e942 5304 "unsupported.\n");
7b7b1813
RG
5305 return false;
5306 }
5307
ebfd146a
IR
5308 if (!vec_stmt) /* transformation not required. */
5309 {
5310 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
adac3a68 5311 DUMP_VECT_SCOPE ("vectorizable_assignment");
68435eb2 5312 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5313 return true;
5314 }
5315
67b8dbac 5316 /* Transform. */
73fbfcad 5317 if (dump_enabled_p ())
e645e942 5318 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5319
5320 /* Handle def. */
5321 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5322
5323 /* Handle use. */
f18b55bd 5324 for (j = 0; j < ncopies; j++)
ebfd146a 5325 {
f18b55bd
IR
5326 /* Handle uses. */
5327 if (j == 0)
306b0c92 5328 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5329 else
5330 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5331
5332 /* Arguments are ready. create the new vector stmt. */
e1bd7296 5333 stmt_vec_info new_stmt_info = NULL;
9771b263 5334 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5335 {
7b7ec6c5
RG
5336 if (CONVERT_EXPR_CODE_P (code)
5337 || code == VIEW_CONVERT_EXPR)
4a73490d 5338 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
e1bd7296 5339 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
f18b55bd
IR
5340 new_temp = make_ssa_name (vec_dest, new_stmt);
5341 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 5342 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
f18b55bd 5343 if (slp_node)
e1bd7296 5344 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
f18b55bd 5345 }
ebfd146a
IR
5346
5347 if (slp_node)
f18b55bd
IR
5348 continue;
5349
5350 if (j == 0)
e1bd7296 5351 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
f18b55bd 5352 else
e1bd7296 5353 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
f18b55bd 5354
e1bd7296 5355 prev_stmt_info = new_stmt_info;
f18b55bd 5356 }
b8698a0f 5357
9771b263 5358 vec_oprnds.release ();
ebfd146a
IR
5359 return true;
5360}
5361
9dc3f7de 5362
1107f3ae
IR
5363/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5364 either as shift by a scalar or by a vector. */
5365
5366bool
5367vect_supportable_shift (enum tree_code code, tree scalar_type)
5368{
5369
ef4bddc2 5370 machine_mode vec_mode;
1107f3ae
IR
5371 optab optab;
5372 int icode;
5373 tree vectype;
5374
5375 vectype = get_vectype_for_scalar_type (scalar_type);
5376 if (!vectype)
5377 return false;
5378
5379 optab = optab_for_tree_code (code, vectype, optab_scalar);
5380 if (!optab
5381 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5382 {
5383 optab = optab_for_tree_code (code, vectype, optab_vector);
5384 if (!optab
5385 || (optab_handler (optab, TYPE_MODE (vectype))
5386 == CODE_FOR_nothing))
5387 return false;
5388 }
5389
5390 vec_mode = TYPE_MODE (vectype);
5391 icode = (int) optab_handler (optab, vec_mode);
5392 if (icode == CODE_FOR_nothing)
5393 return false;
5394
5395 return true;
5396}
5397
5398
9dc3f7de
IR
5399/* Function vectorizable_shift.
5400
5401 Check if STMT performs a shift operation that can be vectorized.
5402 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5403 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5404 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5405
5406static bool
355fe088 5407vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 5408 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5409 stmt_vector_for_cost *cost_vec)
9dc3f7de
IR
5410{
5411 tree vec_dest;
5412 tree scalar_dest;
5413 tree op0, op1 = NULL;
5414 tree vec_oprnd1 = NULL_TREE;
5415 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5416 tree vectype;
5417 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5418 enum tree_code code;
ef4bddc2 5419 machine_mode vec_mode;
9dc3f7de
IR
5420 tree new_temp;
5421 optab optab;
5422 int icode;
ef4bddc2 5423 machine_mode optab_op2_mode;
9dc3f7de 5424 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5425 int ndts = 2;
9dc3f7de 5426 stmt_vec_info prev_stmt_info;
928686b1
RS
5427 poly_uint64 nunits_in;
5428 poly_uint64 nunits_out;
9dc3f7de 5429 tree vectype_out;
cede2577 5430 tree op1_vectype;
9dc3f7de
IR
5431 int ncopies;
5432 int j, i;
6e1aa848
DN
5433 vec<tree> vec_oprnds0 = vNULL;
5434 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5435 tree vop0, vop1;
5436 unsigned int k;
49eab32e 5437 bool scalar_shift_arg = true;
9dc3f7de 5438 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5439 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5440
5441 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5442 return false;
5443
66c16fd9
RB
5444 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5445 && ! vec_stmt)
9dc3f7de
IR
5446 return false;
5447
5448 /* Is STMT a vectorizable binary/unary operation? */
5449 if (!is_gimple_assign (stmt))
5450 return false;
5451
5452 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5453 return false;
5454
5455 code = gimple_assign_rhs_code (stmt);
5456
5457 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5458 || code == RROTATE_EXPR))
5459 return false;
5460
5461 scalar_dest = gimple_assign_lhs (stmt);
5462 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5463 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5464 {
73fbfcad 5465 if (dump_enabled_p ())
78c60e3d 5466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5467 "bit-precision shifts not supported.\n");
7b7b1813
RG
5468 return false;
5469 }
9dc3f7de
IR
5470
5471 op0 = gimple_assign_rhs1 (stmt);
894dd753 5472 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
9dc3f7de 5473 {
73fbfcad 5474 if (dump_enabled_p ())
78c60e3d 5475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5476 "use not simple.\n");
9dc3f7de
IR
5477 return false;
5478 }
5479 /* If op0 is an external or constant def use a vector type with
5480 the same size as the output vector type. */
5481 if (!vectype)
5482 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5483 if (vec_stmt)
5484 gcc_assert (vectype);
5485 if (!vectype)
5486 {
73fbfcad 5487 if (dump_enabled_p ())
78c60e3d 5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5489 "no vectype for scalar type\n");
9dc3f7de
IR
5490 return false;
5491 }
5492
5493 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5494 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5495 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5496 return false;
5497
5498 op1 = gimple_assign_rhs2 (stmt);
fef96d8e
RS
5499 stmt_vec_info op1_def_stmt_info;
5500 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5501 &op1_def_stmt_info))
9dc3f7de 5502 {
73fbfcad 5503 if (dump_enabled_p ())
78c60e3d 5504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5505 "use not simple.\n");
9dc3f7de
IR
5506 return false;
5507 }
5508
9dc3f7de
IR
5509 /* Multiple types in SLP are handled by creating the appropriate number of
5510 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5511 case of SLP. */
fce57248 5512 if (slp_node)
9dc3f7de
IR
5513 ncopies = 1;
5514 else
e8f142e2 5515 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5516
5517 gcc_assert (ncopies >= 1);
5518
5519 /* Determine whether the shift amount is a vector, or scalar. If the
5520 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5521
dbfa87aa
YR
5522 if ((dt[1] == vect_internal_def
5523 || dt[1] == vect_induction_def)
5524 && !slp_node)
49eab32e
JJ
5525 scalar_shift_arg = false;
5526 else if (dt[1] == vect_constant_def
5527 || dt[1] == vect_external_def
5528 || dt[1] == vect_internal_def)
5529 {
5530 /* In SLP, need to check whether the shift count is the same,
5531 in loops if it is a constant or invariant, it is always
5532 a scalar shift. */
5533 if (slp_node)
5534 {
b9787581
RS
5535 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5536 stmt_vec_info slpstmt_info;
49eab32e 5537
b9787581
RS
5538 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5539 {
5540 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5541 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5542 scalar_shift_arg = false;
5543 }
49eab32e 5544 }
60d393e8
RB
5545
5546 /* If the shift amount is computed by a pattern stmt we cannot
5547 use the scalar amount directly thus give up and use a vector
5548 shift. */
fef96d8e
RS
5549 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5550 scalar_shift_arg = false;
49eab32e
JJ
5551 }
5552 else
5553 {
73fbfcad 5554 if (dump_enabled_p ())
78c60e3d 5555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5556 "operand mode requires invariant argument.\n");
49eab32e
JJ
5557 return false;
5558 }
5559
9dc3f7de 5560 /* Vector shifted by vector. */
49eab32e 5561 if (!scalar_shift_arg)
9dc3f7de
IR
5562 {
5563 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5564 if (dump_enabled_p ())
78c60e3d 5565 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5566 "vector/vector shift/rotate found.\n");
78c60e3d 5567
aa948027
JJ
5568 if (!op1_vectype)
5569 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5570 if (op1_vectype == NULL_TREE
5571 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5572 {
73fbfcad 5573 if (dump_enabled_p ())
78c60e3d
SS
5574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5575 "unusable type for last operand in"
e645e942 5576 " vector/vector shift/rotate.\n");
cede2577
JJ
5577 return false;
5578 }
9dc3f7de
IR
5579 }
5580 /* See if the machine has a vector shifted by scalar insn and if not
5581 then see if it has a vector shifted by vector insn. */
49eab32e 5582 else
9dc3f7de
IR
5583 {
5584 optab = optab_for_tree_code (code, vectype, optab_scalar);
5585 if (optab
5586 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5587 {
73fbfcad 5588 if (dump_enabled_p ())
78c60e3d 5589 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5590 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5591 }
5592 else
5593 {
5594 optab = optab_for_tree_code (code, vectype, optab_vector);
5595 if (optab
5596 && (optab_handler (optab, TYPE_MODE (vectype))
5597 != CODE_FOR_nothing))
5598 {
49eab32e
JJ
5599 scalar_shift_arg = false;
5600
73fbfcad 5601 if (dump_enabled_p ())
78c60e3d 5602 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5603 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5604
5605 /* Unlike the other binary operators, shifts/rotates have
5606 the rhs being int, instead of the same type as the lhs,
5607 so make sure the scalar is the right type if we are
aa948027 5608 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5609 if (dt[1] == vect_constant_def)
5610 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5611 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5612 TREE_TYPE (op1)))
5613 {
5614 if (slp_node
5615 && TYPE_MODE (TREE_TYPE (vectype))
5616 != TYPE_MODE (TREE_TYPE (op1)))
5617 {
73fbfcad 5618 if (dump_enabled_p ())
78c60e3d
SS
5619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5620 "unusable type for last operand in"
e645e942 5621 " vector/vector shift/rotate.\n");
21c0a521 5622 return false;
aa948027
JJ
5623 }
5624 if (vec_stmt && !slp_node)
5625 {
5626 op1 = fold_convert (TREE_TYPE (vectype), op1);
5627 op1 = vect_init_vector (stmt, op1,
5628 TREE_TYPE (vectype), NULL);
5629 }
5630 }
9dc3f7de
IR
5631 }
5632 }
5633 }
9dc3f7de
IR
5634
5635 /* Supportable by target? */
5636 if (!optab)
5637 {
73fbfcad 5638 if (dump_enabled_p ())
78c60e3d 5639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5640 "no optab.\n");
9dc3f7de
IR
5641 return false;
5642 }
5643 vec_mode = TYPE_MODE (vectype);
5644 icode = (int) optab_handler (optab, vec_mode);
5645 if (icode == CODE_FOR_nothing)
5646 {
73fbfcad 5647 if (dump_enabled_p ())
78c60e3d 5648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5649 "op not supported by target.\n");
9dc3f7de 5650 /* Check only during analysis. */
cf098191 5651 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5652 || (!vec_stmt
5653 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5654 return false;
73fbfcad 5655 if (dump_enabled_p ())
e645e942
TJ
5656 dump_printf_loc (MSG_NOTE, vect_location,
5657 "proceeding using word mode.\n");
9dc3f7de
IR
5658 }
5659
5660 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5661 if (!vec_stmt
5662 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5663 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5664 {
73fbfcad 5665 if (dump_enabled_p ())
78c60e3d 5666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5667 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5668 return false;
5669 }
5670
5671 if (!vec_stmt) /* transformation not required. */
5672 {
5673 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
adac3a68 5674 DUMP_VECT_SCOPE ("vectorizable_shift");
68435eb2 5675 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
9dc3f7de
IR
5676 return true;
5677 }
5678
67b8dbac 5679 /* Transform. */
9dc3f7de 5680
73fbfcad 5681 if (dump_enabled_p ())
78c60e3d 5682 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5683 "transform binary/unary operation.\n");
9dc3f7de
IR
5684
5685 /* Handle def. */
5686 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5687
9dc3f7de
IR
5688 prev_stmt_info = NULL;
5689 for (j = 0; j < ncopies; j++)
5690 {
5691 /* Handle uses. */
5692 if (j == 0)
5693 {
5694 if (scalar_shift_arg)
5695 {
5696 /* Vector shl and shr insn patterns can be defined with scalar
5697 operand 2 (shift operand). In this case, use constant or loop
5698 invariant op1 directly, without extending it to vector mode
5699 first. */
5700 optab_op2_mode = insn_data[icode].operand[2].mode;
5701 if (!VECTOR_MODE_P (optab_op2_mode))
5702 {
73fbfcad 5703 if (dump_enabled_p ())
78c60e3d 5704 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5705 "operand 1 using scalar mode.\n");
9dc3f7de 5706 vec_oprnd1 = op1;
8930f723 5707 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5708 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5709 if (slp_node)
5710 {
5711 /* Store vec_oprnd1 for every vector stmt to be created
5712 for SLP_NODE. We check during the analysis that all
5713 the shift arguments are the same.
5714 TODO: Allow different constants for different vector
5715 stmts generated for an SLP instance. */
5716 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5717 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5718 }
5719 }
5720 }
5721
5722 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5723 (a special case for certain kind of vector shifts); otherwise,
5724 operand 1 should be of a vector type (the usual case). */
5725 if (vec_oprnd1)
5726 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5727 slp_node);
9dc3f7de
IR
5728 else
5729 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5730 slp_node);
9dc3f7de
IR
5731 }
5732 else
5733 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5734
5735 /* Arguments are ready. Create the new vector stmt. */
e1bd7296 5736 stmt_vec_info new_stmt_info = NULL;
9771b263 5737 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5738 {
9771b263 5739 vop1 = vec_oprnds1[i];
e1bd7296 5740 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5741 new_temp = make_ssa_name (vec_dest, new_stmt);
5742 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 5743 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
9dc3f7de 5744 if (slp_node)
e1bd7296 5745 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9dc3f7de
IR
5746 }
5747
5748 if (slp_node)
5749 continue;
5750
5751 if (j == 0)
e1bd7296 5752 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9dc3f7de 5753 else
e1bd7296
RS
5754 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5755 prev_stmt_info = new_stmt_info;
9dc3f7de
IR
5756 }
5757
9771b263
DN
5758 vec_oprnds0.release ();
5759 vec_oprnds1.release ();
9dc3f7de
IR
5760
5761 return true;
5762}
5763
5764
ebfd146a
IR
5765/* Function vectorizable_operation.
5766
16949072
RG
5767 Check if STMT performs a binary, unary or ternary operation that can
5768 be vectorized.
b8698a0f 5769 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5770 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5771 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5772
5773static bool
355fe088 5774vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 5775 stmt_vec_info *vec_stmt, slp_tree slp_node,
68435eb2 5776 stmt_vector_for_cost *cost_vec)
ebfd146a 5777{
00f07b86 5778 tree vec_dest;
ebfd146a 5779 tree scalar_dest;
16949072 5780 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5781 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5782 tree vectype;
ebfd146a 5783 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5784 enum tree_code code, orig_code;
ef4bddc2 5785 machine_mode vec_mode;
ebfd146a
IR
5786 tree new_temp;
5787 int op_type;
00f07b86 5788 optab optab;
523ba738 5789 bool target_support_p;
16949072
RG
5790 enum vect_def_type dt[3]
5791 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5792 int ndts = 3;
ebfd146a 5793 stmt_vec_info prev_stmt_info;
928686b1
RS
5794 poly_uint64 nunits_in;
5795 poly_uint64 nunits_out;
ebfd146a
IR
5796 tree vectype_out;
5797 int ncopies;
5798 int j, i;
6e1aa848
DN
5799 vec<tree> vec_oprnds0 = vNULL;
5800 vec<tree> vec_oprnds1 = vNULL;
5801 vec<tree> vec_oprnds2 = vNULL;
16949072 5802 tree vop0, vop1, vop2;
a70d6342 5803 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5804 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5805
a70d6342 5806 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5807 return false;
5808
66c16fd9
RB
5809 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5810 && ! vec_stmt)
ebfd146a
IR
5811 return false;
5812
5813 /* Is STMT a vectorizable binary/unary operation? */
5814 if (!is_gimple_assign (stmt))
5815 return false;
5816
5817 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5818 return false;
5819
0eb952ea 5820 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5821
1af4ebf5
MG
5822 /* For pointer addition and subtraction, we should use the normal
5823 plus and minus for the vector operation. */
ebfd146a
IR
5824 if (code == POINTER_PLUS_EXPR)
5825 code = PLUS_EXPR;
1af4ebf5
MG
5826 if (code == POINTER_DIFF_EXPR)
5827 code = MINUS_EXPR;
ebfd146a
IR
5828
5829 /* Support only unary or binary operations. */
5830 op_type = TREE_CODE_LENGTH (code);
16949072 5831 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5832 {
73fbfcad 5833 if (dump_enabled_p ())
78c60e3d 5834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5835 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5836 op_type);
ebfd146a
IR
5837 return false;
5838 }
5839
b690cc0f
RG
5840 scalar_dest = gimple_assign_lhs (stmt);
5841 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5842
7b7b1813
RG
5843 /* Most operations cannot handle bit-precision types without extra
5844 truncations. */
045c1278 5845 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5846 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5847 /* Exception are bitwise binary operations. */
5848 && code != BIT_IOR_EXPR
5849 && code != BIT_XOR_EXPR
5850 && code != BIT_AND_EXPR)
5851 {
73fbfcad 5852 if (dump_enabled_p ())
78c60e3d 5853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5854 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5855 return false;
5856 }
5857
ebfd146a 5858 op0 = gimple_assign_rhs1 (stmt);
894dd753 5859 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
ebfd146a 5860 {
73fbfcad 5861 if (dump_enabled_p ())
78c60e3d 5862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5863 "use not simple.\n");
ebfd146a
IR
5864 return false;
5865 }
b690cc0f
RG
5866 /* If op0 is an external or constant def use a vector type with
5867 the same size as the output vector type. */
5868 if (!vectype)
b036c6c5
IE
5869 {
5870 /* For boolean type we cannot determine vectype by
5871 invariant value (don't know whether it is a vector
5872 of booleans or vector of integers). We use output
5873 vectype because operations on boolean don't change
5874 type. */
2568d8a1 5875 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5876 {
2568d8a1 5877 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5878 {
5879 if (dump_enabled_p ())
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 "not supported operation on bool value.\n");
5882 return false;
5883 }
5884 vectype = vectype_out;
5885 }
5886 else
5887 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5888 }
7d8930a0
IR
5889 if (vec_stmt)
5890 gcc_assert (vectype);
5891 if (!vectype)
5892 {
73fbfcad 5893 if (dump_enabled_p ())
7d8930a0 5894 {
78c60e3d
SS
5895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5896 "no vectype for scalar type ");
5897 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5898 TREE_TYPE (op0));
e645e942 5899 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5900 }
5901
5902 return false;
5903 }
b690cc0f
RG
5904
5905 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5906 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5907 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5908 return false;
ebfd146a 5909
16949072 5910 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5911 {
5912 op1 = gimple_assign_rhs2 (stmt);
894dd753 5913 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
ebfd146a 5914 {
73fbfcad 5915 if (dump_enabled_p ())
78c60e3d 5916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5917 "use not simple.\n");
ebfd146a
IR
5918 return false;
5919 }
5920 }
16949072
RG
5921 if (op_type == ternary_op)
5922 {
5923 op2 = gimple_assign_rhs3 (stmt);
894dd753 5924 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
16949072 5925 {
73fbfcad 5926 if (dump_enabled_p ())
78c60e3d 5927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5928 "use not simple.\n");
16949072
RG
5929 return false;
5930 }
5931 }
ebfd146a 5932
b690cc0f 5933 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5934 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5935 case of SLP. */
fce57248 5936 if (slp_node)
b690cc0f
RG
5937 ncopies = 1;
5938 else
e8f142e2 5939 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5940
5941 gcc_assert (ncopies >= 1);
5942
9dc3f7de 5943 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5944 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5945 || code == RROTATE_EXPR)
9dc3f7de 5946 return false;
ebfd146a 5947
ebfd146a 5948 /* Supportable by target? */
00f07b86
RH
5949
5950 vec_mode = TYPE_MODE (vectype);
5951 if (code == MULT_HIGHPART_EXPR)
523ba738 5952 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5953 else
5954 {
5955 optab = optab_for_tree_code (code, vectype, optab_default);
5956 if (!optab)
5deb57cb 5957 {
73fbfcad 5958 if (dump_enabled_p ())
78c60e3d 5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5960 "no optab.\n");
00f07b86 5961 return false;
5deb57cb 5962 }
523ba738
RS
5963 target_support_p = (optab_handler (optab, vec_mode)
5964 != CODE_FOR_nothing);
5deb57cb
JJ
5965 }
5966
523ba738 5967 if (!target_support_p)
ebfd146a 5968 {
73fbfcad 5969 if (dump_enabled_p ())
78c60e3d 5970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5971 "op not supported by target.\n");
ebfd146a 5972 /* Check only during analysis. */
cf098191 5973 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5974 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5975 return false;
73fbfcad 5976 if (dump_enabled_p ())
e645e942
TJ
5977 dump_printf_loc (MSG_NOTE, vect_location,
5978 "proceeding using word mode.\n");
383d9c83
IR
5979 }
5980
4a00c761 5981 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5982 if (!VECTOR_MODE_P (vec_mode)
5983 && !vec_stmt
ca09abcb 5984 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5985 {
73fbfcad 5986 if (dump_enabled_p ())
78c60e3d 5987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5988 "not worthwhile without SIMD support.\n");
e34842c6 5989 return false;
7d8930a0 5990 }
ebfd146a 5991
ebfd146a
IR
5992 if (!vec_stmt) /* transformation not required. */
5993 {
4a00c761 5994 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
adac3a68 5995 DUMP_VECT_SCOPE ("vectorizable_operation");
68435eb2 5996 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
ebfd146a
IR
5997 return true;
5998 }
5999
67b8dbac 6000 /* Transform. */
ebfd146a 6001
73fbfcad 6002 if (dump_enabled_p ())
78c60e3d 6003 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6004 "transform binary/unary operation.\n");
383d9c83 6005
0eb952ea
JJ
6006 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6007 vectors with unsigned elements, but the result is signed. So, we
6008 need to compute the MINUS_EXPR into vectype temporary and
6009 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6010 tree vec_cvt_dest = NULL_TREE;
6011 if (orig_code == POINTER_DIFF_EXPR)
7b76867b
RB
6012 {
6013 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6014 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6015 }
6016 /* Handle def. */
6017 else
6018 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
0eb952ea 6019
ebfd146a
IR
6020 /* In case the vectorization factor (VF) is bigger than the number
6021 of elements that we can fit in a vectype (nunits), we have to generate
6022 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
6023 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6024 from one copy of the vector stmt to the next, in the field
6025 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6026 stages to find the correct vector defs to be used when vectorizing
6027 stmts that use the defs of the current stmt. The example below
6028 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6029 we need to create 4 vectorized stmts):
6030
6031 before vectorization:
6032 RELATED_STMT VEC_STMT
6033 S1: x = memref - -
6034 S2: z = x + 1 - -
6035
6036 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6037 there):
6038 RELATED_STMT VEC_STMT
6039 VS1_0: vx0 = memref0 VS1_1 -
6040 VS1_1: vx1 = memref1 VS1_2 -
6041 VS1_2: vx2 = memref2 VS1_3 -
6042 VS1_3: vx3 = memref3 - -
6043 S1: x = load - VS1_0
6044 S2: z = x + 1 - -
6045
6046 step2: vectorize stmt S2 (done here):
6047 To vectorize stmt S2 we first need to find the relevant vector
6048 def for the first operand 'x'. This is, as usual, obtained from
6049 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6050 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6051 relevant vector def 'vx0'. Having found 'vx0' we can generate
6052 the vector stmt VS2_0, and as usual, record it in the
6053 STMT_VINFO_VEC_STMT of stmt S2.
6054 When creating the second copy (VS2_1), we obtain the relevant vector
6055 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6056 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6057 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6058 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6059 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6060 chain of stmts and pointers:
6061 RELATED_STMT VEC_STMT
6062 VS1_0: vx0 = memref0 VS1_1 -
6063 VS1_1: vx1 = memref1 VS1_2 -
6064 VS1_2: vx2 = memref2 VS1_3 -
6065 VS1_3: vx3 = memref3 - -
6066 S1: x = load - VS1_0
6067 VS2_0: vz0 = vx0 + v1 VS2_1 -
6068 VS2_1: vz1 = vx1 + v1 VS2_2 -
6069 VS2_2: vz2 = vx2 + v1 VS2_3 -
6070 VS2_3: vz3 = vx3 + v1 - -
6071 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
6072
6073 prev_stmt_info = NULL;
6074 for (j = 0; j < ncopies; j++)
6075 {
6076 /* Handle uses. */
6077 if (j == 0)
4a00c761 6078 {
d6476f90 6079 if (op_type == binary_op)
4a00c761 6080 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 6081 slp_node);
d6476f90
RB
6082 else if (op_type == ternary_op)
6083 {
6084 if (slp_node)
6085 {
6086 auto_vec<tree> ops(3);
6087 ops.quick_push (op0);
6088 ops.quick_push (op1);
6089 ops.quick_push (op2);
6090 auto_vec<vec<tree> > vec_defs(3);
6091 vect_get_slp_defs (ops, slp_node, &vec_defs);
6092 vec_oprnds0 = vec_defs[0];
6093 vec_oprnds1 = vec_defs[1];
6094 vec_oprnds2 = vec_defs[2];
6095 }
6096 else
6097 {
6098 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6099 NULL);
6100 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6101 NULL);
6102 }
6103 }
4a00c761
JJ
6104 else
6105 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 6106 slp_node);
4a00c761 6107 }
ebfd146a 6108 else
4a00c761
JJ
6109 {
6110 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6111 if (op_type == ternary_op)
6112 {
9771b263
DN
6113 tree vec_oprnd = vec_oprnds2.pop ();
6114 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6115 vec_oprnd));
4a00c761
JJ
6116 }
6117 }
6118
6119 /* Arguments are ready. Create the new vector stmt. */
e1bd7296 6120 stmt_vec_info new_stmt_info = NULL;
9771b263 6121 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 6122 {
4a00c761 6123 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 6124 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 6125 vop2 = ((op_type == ternary_op)
9771b263 6126 ? vec_oprnds2[i] : NULL_TREE);
e1bd7296
RS
6127 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6128 vop0, vop1, vop2);
4a00c761
JJ
6129 new_temp = make_ssa_name (vec_dest, new_stmt);
6130 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296 6131 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
6132 if (vec_cvt_dest)
6133 {
6134 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
e1bd7296
RS
6135 gassign *new_stmt
6136 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6137 new_temp);
0eb952ea
JJ
6138 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6139 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296
RS
6140 new_stmt_info
6141 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea 6142 }
4a00c761 6143 if (slp_node)
e1bd7296 6144 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
ebfd146a
IR
6145 }
6146
4a00c761
JJ
6147 if (slp_node)
6148 continue;
6149
6150 if (j == 0)
e1bd7296 6151 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4a00c761 6152 else
e1bd7296
RS
6153 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6154 prev_stmt_info = new_stmt_info;
ebfd146a
IR
6155 }
6156
9771b263
DN
6157 vec_oprnds0.release ();
6158 vec_oprnds1.release ();
6159 vec_oprnds2.release ();
ebfd146a 6160
ebfd146a
IR
6161 return true;
6162}
6163
f702e7d4 6164/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
6165
6166static void
f702e7d4 6167ensure_base_align (struct data_reference *dr)
c716e67f 6168{
ca823c85 6169 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
c716e67f
XDL
6170 return;
6171
52639a61 6172 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 6173 {
52639a61 6174 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 6175
f702e7d4
RS
6176 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6177
428f0c67 6178 if (decl_in_symtab_p (base_decl))
f702e7d4 6179 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6180 else
6181 {
f702e7d4 6182 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6183 DECL_USER_ALIGN (base_decl) = 1;
6184 }
52639a61 6185 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6186 }
6187}
6188
ebfd146a 6189
44fc7854
BE
6190/* Function get_group_alias_ptr_type.
6191
6192 Return the alias type for the group starting at FIRST_STMT. */
6193
6194static tree
6195get_group_alias_ptr_type (gimple *first_stmt)
6196{
6197 struct data_reference *first_dr, *next_dr;
44fc7854
BE
6198
6199 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
bffb8014
RS
6200 stmt_vec_info next_stmt_info
6201 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6202 while (next_stmt_info)
44fc7854 6203 {
bffb8014 6204 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
44fc7854
BE
6205 if (get_alias_set (DR_REF (first_dr))
6206 != get_alias_set (DR_REF (next_dr)))
6207 {
6208 if (dump_enabled_p ())
6209 dump_printf_loc (MSG_NOTE, vect_location,
6210 "conflicting alias set types.\n");
6211 return ptr_type_node;
6212 }
bffb8014 6213 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
44fc7854
BE
6214 }
6215 return reference_alias_ptr_type (DR_REF (first_dr));
6216}
6217
6218
ebfd146a
IR
6219/* Function vectorizable_store.
6220
b8698a0f
L
6221 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6222 can be vectorized.
6223 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6224 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6225 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6226
6227static bool
1eede195
RS
6228vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi,
6229 stmt_vec_info *vec_stmt, slp_tree slp_node,
6230 stmt_vector_for_cost *cost_vec)
ebfd146a 6231{
ebfd146a
IR
6232 tree data_ref;
6233 tree op;
6234 tree vec_oprnd = NULL_TREE;
6235 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6236 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6237 tree elem_type;
ebfd146a 6238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6239 struct loop *loop = NULL;
ef4bddc2 6240 machine_mode vec_mode;
ebfd146a
IR
6241 tree dummy;
6242 enum dr_alignment_support alignment_support_scheme;
929b4411
RS
6243 enum vect_def_type rhs_dt = vect_unknown_def_type;
6244 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6245 stmt_vec_info prev_stmt_info = NULL;
6246 tree dataref_ptr = NULL_TREE;
74bf76ed 6247 tree dataref_offset = NULL_TREE;
355fe088 6248 gimple *ptr_incr = NULL;
ebfd146a
IR
6249 int ncopies;
6250 int j;
bffb8014 6251 stmt_vec_info first_stmt_info;
2de001ee 6252 bool grouped_store;
ebfd146a 6253 unsigned int group_size, i;
6e1aa848
DN
6254 vec<tree> oprnds = vNULL;
6255 vec<tree> result_chain = vNULL;
ebfd146a 6256 bool inv_p;
09dfa495 6257 tree offset = NULL_TREE;
6e1aa848 6258 vec<tree> vec_oprnds = vNULL;
ebfd146a 6259 bool slp = (slp_node != NULL);
ebfd146a 6260 unsigned int vec_num;
a70d6342 6261 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6262 vec_info *vinfo = stmt_info->vinfo;
272c6793 6263 tree aggr_type;
134c85ca 6264 gather_scatter_info gs_info;
d9f21f6a 6265 poly_uint64 vf;
2de001ee 6266 vec_load_store_type vls_type;
44fc7854 6267 tree ref_type;
a70d6342 6268
a70d6342 6269 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6270 return false;
6271
66c16fd9
RB
6272 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6273 && ! vec_stmt)
ebfd146a
IR
6274 return false;
6275
6276 /* Is vectorizable store? */
6277
c3a8f964
RS
6278 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6279 if (is_gimple_assign (stmt))
6280 {
6281 tree scalar_dest = gimple_assign_lhs (stmt);
6282 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6283 && is_pattern_stmt_p (stmt_info))
6284 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6285 if (TREE_CODE (scalar_dest) != ARRAY_REF
6286 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6287 && TREE_CODE (scalar_dest) != INDIRECT_REF
6288 && TREE_CODE (scalar_dest) != COMPONENT_REF
6289 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6290 && TREE_CODE (scalar_dest) != REALPART_EXPR
6291 && TREE_CODE (scalar_dest) != MEM_REF)
6292 return false;
6293 }
6294 else
6295 {
6296 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6297 if (!call || !gimple_call_internal_p (call))
6298 return false;
6299
6300 internal_fn ifn = gimple_call_internal_fn (call);
6301 if (!internal_store_fn_p (ifn))
c3a8f964 6302 return false;
ebfd146a 6303
c3a8f964
RS
6304 if (slp_node != NULL)
6305 {
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6308 "SLP of masked stores not supported.\n");
6309 return false;
6310 }
6311
f307441a
RS
6312 int mask_index = internal_fn_mask_index (ifn);
6313 if (mask_index >= 0)
6314 {
6315 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6316 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6317 &mask_vectype))
f307441a
RS
6318 return false;
6319 }
c3a8f964
RS
6320 }
6321
6322 op = vect_get_store_rhs (stmt);
ebfd146a 6323
fce57248
RS
6324 /* Cannot have hybrid store SLP -- that would mean storing to the
6325 same location twice. */
6326 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6327
f4d09712 6328 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6329 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6330
6331 if (loop_vinfo)
b17dc4d4
RB
6332 {
6333 loop = LOOP_VINFO_LOOP (loop_vinfo);
6334 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6335 }
6336 else
6337 vf = 1;
465c8c19
JJ
6338
6339 /* Multiple types in SLP are handled by creating the appropriate number of
6340 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6341 case of SLP. */
fce57248 6342 if (slp)
465c8c19
JJ
6343 ncopies = 1;
6344 else
e8f142e2 6345 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6346
6347 gcc_assert (ncopies >= 1);
6348
6349 /* FORNOW. This restriction should be relaxed. */
6350 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6351 {
6352 if (dump_enabled_p ())
6353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6354 "multiple types in nested loop.\n");
6355 return false;
6356 }
6357
929b4411 6358 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6359 return false;
6360
272c6793 6361 elem_type = TREE_TYPE (vectype);
ebfd146a 6362 vec_mode = TYPE_MODE (vectype);
7b7b1813 6363
ebfd146a
IR
6364 if (!STMT_VINFO_DATA_REF (stmt_info))
6365 return false;
6366
2de001ee 6367 vect_memory_access_type memory_access_type;
7e11fc7f 6368 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6369 &memory_access_type, &gs_info))
6370 return false;
3bab6342 6371
c3a8f964
RS
6372 if (mask)
6373 {
7e11fc7f
RS
6374 if (memory_access_type == VMAT_CONTIGUOUS)
6375 {
6376 if (!VECTOR_MODE_P (vec_mode)
6377 || !can_vec_mask_load_store_p (vec_mode,
6378 TYPE_MODE (mask_vectype), false))
6379 return false;
6380 }
f307441a
RS
6381 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6382 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6383 {
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6386 "unsupported access type for masked store.\n");
6387 return false;
6388 }
c3a8f964
RS
6389 }
6390 else
6391 {
6392 /* FORNOW. In some cases can vectorize even if data-type not supported
6393 (e.g. - array initialization with 0). */
6394 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6395 return false;
6396 }
6397
f307441a 6398 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6399 && memory_access_type != VMAT_GATHER_SCATTER
6400 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6401 if (grouped_store)
6402 {
bffb8014
RS
6403 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6404 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6405 group_size = DR_GROUP_SIZE (first_stmt_info);
7cfb4d93
RS
6406 }
6407 else
6408 {
bffb8014 6409 first_stmt_info = stmt_info;
7cfb4d93
RS
6410 first_dr = dr;
6411 group_size = vec_num = 1;
6412 }
6413
ebfd146a
IR
6414 if (!vec_stmt) /* transformation not required. */
6415 {
2de001ee 6416 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6417
6418 if (loop_vinfo
6419 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6420 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6421 memory_access_type, &gs_info);
7cfb4d93 6422
ebfd146a 6423 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
68435eb2
RB
6424 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6425 vls_type, slp_node, cost_vec);
ebfd146a
IR
6426 return true;
6427 }
2de001ee 6428 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6429
67b8dbac 6430 /* Transform. */
ebfd146a 6431
f702e7d4 6432 ensure_base_align (dr);
c716e67f 6433
f307441a 6434 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6435 {
c3a8f964 6436 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6437 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6438 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6439 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6440 edge pe = loop_preheader_edge (loop);
6441 gimple_seq seq;
6442 basic_block new_bb;
6443 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6444 poly_uint64 scatter_off_nunits
6445 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6446
4d694b27 6447 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6448 modifier = NONE;
4d694b27 6449 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6450 {
3bab6342
AT
6451 modifier = WIDEN;
6452
4d694b27
RS
6453 /* Currently gathers and scatters are only supported for
6454 fixed-length vectors. */
6455 unsigned int count = scatter_off_nunits.to_constant ();
6456 vec_perm_builder sel (count, count, 1);
6457 for (i = 0; i < (unsigned int) count; ++i)
6458 sel.quick_push (i | (count / 2));
3bab6342 6459
4d694b27 6460 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6461 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6462 indices);
3bab6342
AT
6463 gcc_assert (perm_mask != NULL_TREE);
6464 }
4d694b27 6465 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6466 {
3bab6342
AT
6467 modifier = NARROW;
6468
4d694b27
RS
6469 /* Currently gathers and scatters are only supported for
6470 fixed-length vectors. */
6471 unsigned int count = nunits.to_constant ();
6472 vec_perm_builder sel (count, count, 1);
6473 for (i = 0; i < (unsigned int) count; ++i)
6474 sel.quick_push (i | (count / 2));
3bab6342 6475
4d694b27 6476 vec_perm_indices indices (sel, 2, count);
e3342de4 6477 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6478 gcc_assert (perm_mask != NULL_TREE);
6479 ncopies *= 2;
6480 }
6481 else
6482 gcc_unreachable ();
6483
134c85ca 6484 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6485 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6486 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6487 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6488 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6489 scaletype = TREE_VALUE (arglist);
6490
6491 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6492 && TREE_CODE (rettype) == VOID_TYPE);
6493
134c85ca 6494 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6495 if (!is_gimple_min_invariant (ptr))
6496 {
6497 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6498 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6499 gcc_assert (!new_bb);
6500 }
6501
6502 /* Currently we support only unconditional scatter stores,
6503 so mask should be all ones. */
6504 mask = build_int_cst (masktype, -1);
6505 mask = vect_init_vector (stmt, mask, masktype, NULL);
6506
134c85ca 6507 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6508
6509 prev_stmt_info = NULL;
6510 for (j = 0; j < ncopies; ++j)
6511 {
6512 if (j == 0)
6513 {
6514 src = vec_oprnd1
c3a8f964 6515 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6516 op = vec_oprnd0
134c85ca 6517 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6518 }
6519 else if (modifier != NONE && (j & 1))
6520 {
6521 if (modifier == WIDEN)
6522 {
6523 src = vec_oprnd1
929b4411 6524 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6525 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6526 stmt, gsi);
6527 }
6528 else if (modifier == NARROW)
6529 {
6530 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6531 stmt, gsi);
6532 op = vec_oprnd0
134c85ca
RS
6533 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6534 vec_oprnd0);
3bab6342
AT
6535 }
6536 else
6537 gcc_unreachable ();
6538 }
6539 else
6540 {
6541 src = vec_oprnd1
929b4411 6542 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6543 op = vec_oprnd0
134c85ca
RS
6544 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6545 vec_oprnd0);
3bab6342
AT
6546 }
6547
6548 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6549 {
928686b1
RS
6550 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6551 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6552 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342 6553 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
e1bd7296
RS
6554 gassign *new_stmt
6555 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
3bab6342
AT
6556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6557 src = var;
6558 }
6559
6560 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6561 {
928686b1
RS
6562 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6563 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6564 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342 6565 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
e1bd7296
RS
6566 gassign *new_stmt
6567 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3bab6342
AT
6568 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6569 op = var;
6570 }
6571
e1bd7296 6572 gcall *new_stmt
134c85ca 6573 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
e1bd7296
RS
6574 stmt_vec_info new_stmt_info
6575 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3bab6342 6576
dbe1b846 6577 if (prev_stmt_info == NULL_STMT_VEC_INFO)
e1bd7296 6578 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3bab6342 6579 else
e1bd7296
RS
6580 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6581 prev_stmt_info = new_stmt_info;
3bab6342
AT
6582 }
6583 return true;
6584 }
6585
f307441a 6586 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
bffb8014 6587 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
ebfd146a 6588
f307441a
RS
6589 if (grouped_store)
6590 {
ebfd146a 6591 /* FORNOW */
a70d6342 6592 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6593
6594 /* We vectorize all the stmts of the interleaving group when we
6595 reach the last stmt in the group. */
bffb8014
RS
6596 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6597 < DR_GROUP_SIZE (first_stmt_info)
ebfd146a
IR
6598 && !slp)
6599 {
6600 *vec_stmt = NULL;
6601 return true;
6602 }
6603
6604 if (slp)
4b5caab7 6605 {
0d0293ac 6606 grouped_store = false;
4b5caab7
IR
6607 /* VEC_NUM is the number of vect stmts to be created for this
6608 group. */
6609 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
bffb8014
RS
6610 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6611 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6612 == first_stmt_info);
6613 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6614 op = vect_get_store_rhs (first_stmt_info);
4b5caab7 6615 }
ebfd146a 6616 else
4b5caab7
IR
6617 /* VEC_NUM is the number of vect stmts to be created for this
6618 group. */
ebfd146a 6619 vec_num = group_size;
44fc7854 6620
bffb8014 6621 ref_type = get_group_alias_ptr_type (first_stmt_info);
ebfd146a 6622 }
b8698a0f 6623 else
7cfb4d93 6624 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6625
73fbfcad 6626 if (dump_enabled_p ())
78c60e3d 6627 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6628 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6629
2de001ee
RS
6630 if (memory_access_type == VMAT_ELEMENTWISE
6631 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6632 {
6633 gimple_stmt_iterator incr_gsi;
6634 bool insert_after;
355fe088 6635 gimple *incr;
f2e2a985
MM
6636 tree offvar;
6637 tree ivstep;
6638 tree running_off;
f2e2a985
MM
6639 tree stride_base, stride_step, alias_off;
6640 tree vec_oprnd;
f502d50e 6641 unsigned int g;
4d694b27
RS
6642 /* Checked by get_load_store_type. */
6643 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6644
7cfb4d93 6645 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6646 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6647
6648 stride_base
6649 = fold_build_pointer_plus
b210f45f 6650 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6651 size_binop (PLUS_EXPR,
b210f45f 6652 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6653 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6654 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6655
6656 /* For a store with loop-invariant (but other than power-of-2)
6657 stride (i.e. not a grouped access) like so:
6658
6659 for (i = 0; i < n; i += stride)
6660 array[i] = ...;
6661
6662 we generate a new induction variable and new stores from
6663 the components of the (vectorized) rhs:
6664
6665 for (j = 0; ; j += VF*stride)
6666 vectemp = ...;
6667 tmp1 = vectemp[0];
6668 array[j] = tmp1;
6669 tmp2 = vectemp[1];
6670 array[j + stride] = tmp2;
6671 ...
6672 */
6673
4d694b27 6674 unsigned nstores = const_nunits;
b17dc4d4 6675 unsigned lnel = 1;
cee62fee 6676 tree ltype = elem_type;
04199738 6677 tree lvectype = vectype;
cee62fee
MM
6678 if (slp)
6679 {
4d694b27
RS
6680 if (group_size < const_nunits
6681 && const_nunits % group_size == 0)
b17dc4d4 6682 {
4d694b27 6683 nstores = const_nunits / group_size;
b17dc4d4
RB
6684 lnel = group_size;
6685 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6686 lvectype = vectype;
6687
6688 /* First check if vec_extract optab doesn't support extraction
6689 of vector elts directly. */
b397965c 6690 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6691 machine_mode vmode;
6692 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6693 || !VECTOR_MODE_P (vmode)
414fef4e 6694 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6695 || (convert_optab_handler (vec_extract_optab,
6696 TYPE_MODE (vectype), vmode)
6697 == CODE_FOR_nothing))
6698 {
6699 /* Try to avoid emitting an extract of vector elements
6700 by performing the extracts using an integer type of the
6701 same size, extracting from a vector of those and then
6702 re-interpreting it as the original vector type if
6703 supported. */
6704 unsigned lsize
6705 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6706 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6707 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6708 /* If we can't construct such a vector fall back to
6709 element extracts from the original vector type and
6710 element size stores. */
4d694b27 6711 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6712 && VECTOR_MODE_P (vmode)
414fef4e 6713 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6714 && (convert_optab_handler (vec_extract_optab,
6715 vmode, elmode)
6716 != CODE_FOR_nothing))
6717 {
4d694b27 6718 nstores = lnunits;
04199738
RB
6719 lnel = group_size;
6720 ltype = build_nonstandard_integer_type (lsize, 1);
6721 lvectype = build_vector_type (ltype, nstores);
6722 }
6723 /* Else fall back to vector extraction anyway.
6724 Fewer stores are more important than avoiding spilling
6725 of the vector we extract from. Compared to the
6726 construction case in vectorizable_load no store-forwarding
6727 issue exists here for reasonable archs. */
6728 }
b17dc4d4 6729 }
4d694b27
RS
6730 else if (group_size >= const_nunits
6731 && group_size % const_nunits == 0)
b17dc4d4
RB
6732 {
6733 nstores = 1;
4d694b27 6734 lnel = const_nunits;
b17dc4d4 6735 ltype = vectype;
04199738 6736 lvectype = vectype;
b17dc4d4 6737 }
cee62fee
MM
6738 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6739 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6740 }
6741
f2e2a985
MM
6742 ivstep = stride_step;
6743 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6744 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6745
6746 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6747
b210f45f
RB
6748 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6749 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6750 create_iv (stride_base, ivstep, NULL,
6751 loop, &incr_gsi, insert_after,
6752 &offvar, NULL);
6753 incr = gsi_stmt (incr_gsi);
4fbeb363 6754 loop_vinfo->add_stmt (incr);
f2e2a985 6755
b210f45f 6756 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6757
6758 prev_stmt_info = NULL;
44fc7854 6759 alias_off = build_int_cst (ref_type, 0);
bffb8014 6760 stmt_vec_info next_stmt_info = first_stmt_info;
f502d50e 6761 for (g = 0; g < group_size; g++)
f2e2a985 6762 {
f502d50e
MM
6763 running_off = offvar;
6764 if (g)
f2e2a985 6765 {
f502d50e
MM
6766 tree size = TYPE_SIZE_UNIT (ltype);
6767 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6768 size);
f502d50e 6769 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6770 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6771 running_off, pos);
f2e2a985 6772 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6773 running_off = newoff;
f502d50e 6774 }
b17dc4d4
RB
6775 unsigned int group_el = 0;
6776 unsigned HOST_WIDE_INT
6777 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6778 for (j = 0; j < ncopies; j++)
6779 {
c3a8f964 6780 /* We've set op and dt above, from vect_get_store_rhs,
bffb8014 6781 and first_stmt_info == stmt_info. */
f502d50e
MM
6782 if (j == 0)
6783 {
6784 if (slp)
6785 {
6786 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6787 slp_node);
f502d50e
MM
6788 vec_oprnd = vec_oprnds[0];
6789 }
6790 else
6791 {
bffb8014
RS
6792 op = vect_get_store_rhs (next_stmt_info);
6793 vec_oprnd = vect_get_vec_def_for_operand
6794 (op, next_stmt_info);
f502d50e
MM
6795 }
6796 }
f2e2a985 6797 else
f502d50e
MM
6798 {
6799 if (slp)
6800 vec_oprnd = vec_oprnds[j];
6801 else
c079cbac 6802 {
894dd753 6803 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411
RS
6804 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6805 vec_oprnd);
c079cbac 6806 }
f502d50e 6807 }
04199738
RB
6808 /* Pun the vector to extract from if necessary. */
6809 if (lvectype != vectype)
6810 {
6811 tree tem = make_ssa_name (lvectype);
6812 gimple *pun
6813 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6814 lvectype, vec_oprnd));
6815 vect_finish_stmt_generation (stmt, pun, gsi);
6816 vec_oprnd = tem;
6817 }
f502d50e
MM
6818 for (i = 0; i < nstores; i++)
6819 {
6820 tree newref, newoff;
355fe088 6821 gimple *incr, *assign;
f502d50e
MM
6822 tree size = TYPE_SIZE (ltype);
6823 /* Extract the i'th component. */
6824 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6825 bitsize_int (i), size);
6826 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6827 size, pos);
6828
6829 elem = force_gimple_operand_gsi (gsi, elem, true,
6830 NULL_TREE, true,
6831 GSI_SAME_STMT);
6832
b17dc4d4
RB
6833 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6834 group_el * elsz);
f502d50e 6835 newref = build2 (MEM_REF, ltype,
b17dc4d4 6836 running_off, this_off);
19986382 6837 vect_copy_ref_info (newref, DR_REF (first_dr));
f502d50e
MM
6838
6839 /* And store it to *running_off. */
6840 assign = gimple_build_assign (newref, elem);
e1bd7296
RS
6841 stmt_vec_info assign_info
6842 = vect_finish_stmt_generation (stmt, assign, gsi);
f502d50e 6843
b17dc4d4
RB
6844 group_el += lnel;
6845 if (! slp
6846 || group_el == group_size)
6847 {
6848 newoff = copy_ssa_name (running_off, NULL);
6849 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6850 running_off, stride_step);
6851 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6852
b17dc4d4
RB
6853 running_off = newoff;
6854 group_el = 0;
6855 }
225ce44b
RB
6856 if (g == group_size - 1
6857 && !slp)
f502d50e
MM
6858 {
6859 if (j == 0 && i == 0)
225ce44b 6860 STMT_VINFO_VEC_STMT (stmt_info)
e1bd7296 6861 = *vec_stmt = assign_info;
f502d50e 6862 else
e1bd7296
RS
6863 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6864 prev_stmt_info = assign_info;
f502d50e
MM
6865 }
6866 }
f2e2a985 6867 }
bffb8014 6868 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
b17dc4d4
RB
6869 if (slp)
6870 break;
f2e2a985 6871 }
778dd3b6
RB
6872
6873 vec_oprnds.release ();
f2e2a985
MM
6874 return true;
6875 }
6876
8c681247 6877 auto_vec<tree> dr_chain (group_size);
9771b263 6878 oprnds.create (group_size);
ebfd146a 6879
720f5239 6880 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6881 gcc_assert (alignment_support_scheme);
70088b95
RS
6882 vec_loop_masks *loop_masks
6883 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6884 ? &LOOP_VINFO_MASKS (loop_vinfo)
6885 : NULL);
272c6793 6886 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6887 realignment. vect_supportable_dr_alignment always returns either
6888 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6889 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6890 && !mask
70088b95 6891 && !loop_masks)
272c6793
RS
6892 || alignment_support_scheme == dr_aligned
6893 || alignment_support_scheme == dr_unaligned_supported);
6894
62da9e14
RS
6895 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6896 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6897 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6898
f307441a
RS
6899 tree bump;
6900 tree vec_offset = NULL_TREE;
6901 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6902 {
6903 aggr_type = NULL_TREE;
6904 bump = NULL_TREE;
6905 }
6906 else if (memory_access_type == VMAT_GATHER_SCATTER)
6907 {
6908 aggr_type = elem_type;
6909 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6910 &bump, &vec_offset);
6911 }
272c6793 6912 else
f307441a
RS
6913 {
6914 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6915 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6916 else
6917 aggr_type = vectype;
6918 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6919 }
ebfd146a 6920
c3a8f964
RS
6921 if (mask)
6922 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6923
ebfd146a
IR
6924 /* In case the vectorization factor (VF) is bigger than the number
6925 of elements that we can fit in a vectype (nunits), we have to generate
6926 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6927 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6928 vect_get_vec_def_for_copy_stmt. */
6929
0d0293ac 6930 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6931
6932 S1: &base + 2 = x2
6933 S2: &base = x0
6934 S3: &base + 1 = x1
6935 S4: &base + 3 = x3
6936
6937 We create vectorized stores starting from base address (the access of the
6938 first stmt in the chain (S2 in the above example), when the last store stmt
6939 of the chain (S4) is reached:
6940
6941 VS1: &base = vx2
6942 VS2: &base + vec_size*1 = vx0
6943 VS3: &base + vec_size*2 = vx1
6944 VS4: &base + vec_size*3 = vx3
6945
6946 Then permutation statements are generated:
6947
3fcc1b55
JJ
6948 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6949 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6950 ...
b8698a0f 6951
ebfd146a
IR
6952 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6953 (the order of the data-refs in the output of vect_permute_store_chain
6954 corresponds to the order of scalar stmts in the interleaving chain - see
6955 the documentation of vect_permute_store_chain()).
6956
6957 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6958 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6959 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6960 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6961 */
6962
6963 prev_stmt_info = NULL;
c3a8f964 6964 tree vec_mask = NULL_TREE;
ebfd146a
IR
6965 for (j = 0; j < ncopies; j++)
6966 {
e1bd7296 6967 stmt_vec_info new_stmt_info;
ebfd146a
IR
6968 if (j == 0)
6969 {
6970 if (slp)
6971 {
6972 /* Get vectorized arguments for SLP_NODE. */
d092494c 6973 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6974 NULL, slp_node);
ebfd146a 6975
9771b263 6976 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6977 }
6978 else
6979 {
b8698a0f
L
6980 /* For interleaved stores we collect vectorized defs for all the
6981 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6982 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6983 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6984
2c53b149 6985 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6986 OPRNDS are of size 1. */
bffb8014 6987 stmt_vec_info next_stmt_info = first_stmt_info;
ebfd146a
IR
6988 for (i = 0; i < group_size; i++)
6989 {
b8698a0f 6990 /* Since gaps are not supported for interleaved stores,
2c53b149 6991 DR_GROUP_SIZE is the exact number of stmts in the chain.
bffb8014
RS
6992 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6993 that there is no interleaving, DR_GROUP_SIZE is 1,
6994 and only one iteration of the loop will be executed. */
6995 op = vect_get_store_rhs (next_stmt_info);
6996 vec_oprnd = vect_get_vec_def_for_operand
6997 (op, next_stmt_info);
9771b263
DN
6998 dr_chain.quick_push (vec_oprnd);
6999 oprnds.quick_push (vec_oprnd);
bffb8014 7000 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
ebfd146a 7001 }
c3a8f964
RS
7002 if (mask)
7003 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7004 mask_vectype);
ebfd146a
IR
7005 }
7006
7007 /* We should have catched mismatched types earlier. */
7008 gcc_assert (useless_type_conversion_p (vectype,
7009 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
7010 bool simd_lane_access_p
7011 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7012 if (simd_lane_access_p
7013 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7014 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7015 && integer_zerop (DR_OFFSET (first_dr))
7016 && integer_zerop (DR_INIT (first_dr))
7017 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7018 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
7019 {
7020 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7021 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7022 inv_p = false;
74bf76ed 7023 }
f307441a
RS
7024 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7025 {
7026 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
7027 &dataref_ptr, &vec_offset);
7028 inv_p = false;
7029 }
74bf76ed
JJ
7030 else
7031 dataref_ptr
bffb8014 7032 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
74bf76ed 7033 simd_lane_access_p ? loop : NULL,
09dfa495 7034 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
7035 simd_lane_access_p, &inv_p,
7036 NULL_TREE, bump);
a70d6342 7037 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 7038 }
b8698a0f 7039 else
ebfd146a 7040 {
b8698a0f
L
7041 /* For interleaved stores we created vectorized defs for all the
7042 defs stored in OPRNDS in the previous iteration (previous copy).
7043 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
7044 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7045 next copy.
2c53b149 7046 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
7047 OPRNDS are of size 1. */
7048 for (i = 0; i < group_size; i++)
7049 {
9771b263 7050 op = oprnds[i];
894dd753 7051 vect_is_simple_use (op, vinfo, &rhs_dt);
929b4411 7052 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
7053 dr_chain[i] = vec_oprnd;
7054 oprnds[i] = vec_oprnd;
ebfd146a 7055 }
c3a8f964 7056 if (mask)
929b4411 7057 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
7058 if (dataref_offset)
7059 dataref_offset
f307441a
RS
7060 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7061 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
7062 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7063 vec_offset);
74bf76ed
JJ
7064 else
7065 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 7066 bump);
ebfd146a
IR
7067 }
7068
2de001ee 7069 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 7070 {
272c6793 7071 tree vec_array;
267d3070 7072
3ba4ff41 7073 /* Get an array into which we can store the individual vectors. */
272c6793 7074 vec_array = create_vector_array (vectype, vec_num);
3ba4ff41
RS
7075
7076 /* Invalidate the current contents of VEC_ARRAY. This should
7077 become an RTL clobber too, which prevents the vector registers
7078 from being upward-exposed. */
7079 vect_clobber_variable (stmt, gsi, vec_array);
7080
7081 /* Store the individual vectors into the array. */
272c6793 7082 for (i = 0; i < vec_num; i++)
c2d7ab2a 7083 {
9771b263 7084 vec_oprnd = dr_chain[i];
272c6793 7085 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 7086 }
b8698a0f 7087
7cfb4d93 7088 tree final_mask = NULL;
70088b95
RS
7089 if (loop_masks)
7090 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7091 vectype, j);
7cfb4d93
RS
7092 if (vec_mask)
7093 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7094 vec_mask, gsi);
7095
7e11fc7f 7096 gcall *call;
7cfb4d93 7097 if (final_mask)
7e11fc7f
RS
7098 {
7099 /* Emit:
7100 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7101 VEC_ARRAY). */
7102 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7103 tree alias_ptr = build_int_cst (ref_type, align);
7104 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7105 dataref_ptr, alias_ptr,
7cfb4d93 7106 final_mask, vec_array);
7e11fc7f
RS
7107 }
7108 else
7109 {
7110 /* Emit:
7111 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7112 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7113 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7114 vec_array);
7115 gimple_call_set_lhs (call, data_ref);
7116 }
a844293d 7117 gimple_call_set_nothrow (call, true);
e1bd7296 7118 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
3ba4ff41
RS
7119
7120 /* Record that VEC_ARRAY is now dead. */
7121 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
7122 }
7123 else
7124 {
e1bd7296 7125 new_stmt_info = NULL;
0d0293ac 7126 if (grouped_store)
272c6793 7127 {
b6b9227d
JJ
7128 if (j == 0)
7129 result_chain.create (group_size);
272c6793
RS
7130 /* Permute. */
7131 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7132 &result_chain);
7133 }
c2d7ab2a 7134
bffb8014 7135 stmt_vec_info next_stmt_info = first_stmt_info;
272c6793
RS
7136 for (i = 0; i < vec_num; i++)
7137 {
644ffefd 7138 unsigned align, misalign;
272c6793 7139
7cfb4d93 7140 tree final_mask = NULL_TREE;
70088b95
RS
7141 if (loop_masks)
7142 final_mask = vect_get_loop_mask (gsi, loop_masks,
7143 vec_num * ncopies,
7cfb4d93
RS
7144 vectype, vec_num * j + i);
7145 if (vec_mask)
7146 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7147 vec_mask, gsi);
7148
f307441a
RS
7149 if (memory_access_type == VMAT_GATHER_SCATTER)
7150 {
7151 tree scale = size_int (gs_info.scale);
7152 gcall *call;
70088b95 7153 if (loop_masks)
f307441a
RS
7154 call = gimple_build_call_internal
7155 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7156 scale, vec_oprnd, final_mask);
7157 else
7158 call = gimple_build_call_internal
7159 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7160 scale, vec_oprnd);
7161 gimple_call_set_nothrow (call, true);
e1bd7296
RS
7162 new_stmt_info
7163 = vect_finish_stmt_generation (stmt, call, gsi);
f307441a
RS
7164 break;
7165 }
7166
272c6793
RS
7167 if (i > 0)
7168 /* Bump the vector pointer. */
7169 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 7170 stmt, bump);
272c6793
RS
7171
7172 if (slp)
9771b263 7173 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
7174 else if (grouped_store)
7175 /* For grouped stores vectorized defs are interleaved in
272c6793 7176 vect_permute_store_chain(). */
9771b263 7177 vec_oprnd = result_chain[i];
272c6793 7178
f702e7d4 7179 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 7180 if (aligned_access_p (first_dr))
644ffefd 7181 misalign = 0;
272c6793
RS
7182 else if (DR_MISALIGNMENT (first_dr) == -1)
7183 {
25f68d90 7184 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 7185 misalign = 0;
272c6793
RS
7186 }
7187 else
c3a8f964 7188 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7189 if (dataref_offset == NULL_TREE
7190 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7191 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7192 misalign);
c2d7ab2a 7193
62da9e14 7194 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7195 {
7196 tree perm_mask = perm_mask_for_reverse (vectype);
7197 tree perm_dest
c3a8f964 7198 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7199 vectype);
b731b390 7200 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7201
7202 /* Generate the permute statement. */
355fe088 7203 gimple *perm_stmt
0d0e4a03
JJ
7204 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7205 vec_oprnd, perm_mask);
09dfa495
BM
7206 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7207
7208 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7209 vec_oprnd = new_temp;
7210 }
7211
272c6793 7212 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7213 if (final_mask)
c3a8f964
RS
7214 {
7215 align = least_bit_hwi (misalign | align);
7216 tree ptr = build_int_cst (ref_type, align);
7217 gcall *call
7218 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7219 dataref_ptr, ptr,
7cfb4d93 7220 final_mask, vec_oprnd);
c3a8f964 7221 gimple_call_set_nothrow (call, true);
e1bd7296
RS
7222 new_stmt_info
7223 = vect_finish_stmt_generation (stmt, call, gsi);
c3a8f964
RS
7224 }
7225 else
7226 {
7227 data_ref = fold_build2 (MEM_REF, vectype,
7228 dataref_ptr,
7229 dataref_offset
7230 ? dataref_offset
7231 : build_int_cst (ref_type, 0));
7232 if (aligned_access_p (first_dr))
7233 ;
7234 else if (DR_MISALIGNMENT (first_dr) == -1)
7235 TREE_TYPE (data_ref)
7236 = build_aligned_type (TREE_TYPE (data_ref),
7237 align * BITS_PER_UNIT);
7238 else
7239 TREE_TYPE (data_ref)
7240 = build_aligned_type (TREE_TYPE (data_ref),
7241 TYPE_ALIGN (elem_type));
19986382 7242 vect_copy_ref_info (data_ref, DR_REF (first_dr));
e1bd7296
RS
7243 gassign *new_stmt
7244 = gimple_build_assign (data_ref, vec_oprnd);
7245 new_stmt_info
7246 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
c3a8f964 7247 }
272c6793
RS
7248
7249 if (slp)
7250 continue;
7251
bffb8014
RS
7252 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7253 if (!next_stmt_info)
272c6793
RS
7254 break;
7255 }
ebfd146a 7256 }
1da0876c
RS
7257 if (!slp)
7258 {
7259 if (j == 0)
e1bd7296 7260 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
1da0876c 7261 else
e1bd7296
RS
7262 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7263 prev_stmt_info = new_stmt_info;
1da0876c 7264 }
ebfd146a
IR
7265 }
7266
9771b263
DN
7267 oprnds.release ();
7268 result_chain.release ();
7269 vec_oprnds.release ();
ebfd146a
IR
7270
7271 return true;
7272}
7273
557be5a8
AL
7274/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7275 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7276 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7277 vect_gen_perm_mask_checked. */
a1e53f3f 7278
3fcc1b55 7279tree
4aae3cb3 7280vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7281{
b00cb3bf 7282 tree mask_type;
a1e53f3f 7283
0ecc2b7d
RS
7284 poly_uint64 nunits = sel.length ();
7285 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7286
7287 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7288 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7289}
7290
7ac7e286 7291/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7292 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7293
7294tree
4aae3cb3 7295vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7296{
7ac7e286 7297 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7298 return vect_gen_perm_mask_any (vectype, sel);
7299}
7300
aec7ae7d
JJ
7301/* Given a vector variable X and Y, that was generated for the scalar
7302 STMT, generate instructions to permute the vector elements of X and Y
7303 using permutation mask MASK_VEC, insert them at *GSI and return the
7304 permuted vector variable. */
a1e53f3f
L
7305
7306static tree
355fe088 7307permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7308 gimple_stmt_iterator *gsi)
a1e53f3f
L
7309{
7310 tree vectype = TREE_TYPE (x);
aec7ae7d 7311 tree perm_dest, data_ref;
355fe088 7312 gimple *perm_stmt;
a1e53f3f 7313
7ad429a4
RS
7314 tree scalar_dest = gimple_get_lhs (stmt);
7315 if (TREE_CODE (scalar_dest) == SSA_NAME)
7316 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7317 else
7318 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7319 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7320
7321 /* Generate the permute statement. */
0d0e4a03 7322 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7323 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7324
7325 return data_ref;
7326}
7327
6b916b36
RB
7328/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7329 inserting them on the loops preheader edge. Returns true if we
7330 were successful in doing so (and thus STMT can be moved then),
7331 otherwise returns false. */
7332
7333static bool
355fe088 7334hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7335{
7336 ssa_op_iter i;
7337 tree op;
7338 bool any = false;
7339
7340 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7341 {
355fe088 7342 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7343 if (!gimple_nop_p (def_stmt)
7344 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7345 {
7346 /* Make sure we don't need to recurse. While we could do
7347 so in simple cases when there are more complex use webs
7348 we don't have an easy way to preserve stmt order to fulfil
7349 dependencies within them. */
7350 tree op2;
7351 ssa_op_iter i2;
d1417442
JJ
7352 if (gimple_code (def_stmt) == GIMPLE_PHI)
7353 return false;
6b916b36
RB
7354 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7355 {
355fe088 7356 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7357 if (!gimple_nop_p (def_stmt2)
7358 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7359 return false;
7360 }
7361 any = true;
7362 }
7363 }
7364
7365 if (!any)
7366 return true;
7367
7368 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7369 {
355fe088 7370 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7371 if (!gimple_nop_p (def_stmt)
7372 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7373 {
7374 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7375 gsi_remove (&gsi, false);
7376 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7377 }
7378 }
7379
7380 return true;
7381}
7382
ebfd146a
IR
7383/* vectorizable_load.
7384
b8698a0f
L
7385 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7386 can be vectorized.
7387 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7388 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7389 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7390
7391static bool
1eede195
RS
7392vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi,
7393 stmt_vec_info *vec_stmt, slp_tree slp_node,
7394 slp_instance slp_node_instance,
68435eb2 7395 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
7396{
7397 tree scalar_dest;
7398 tree vec_dest = NULL;
7399 tree data_ref = NULL;
7400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7401 stmt_vec_info prev_stmt_info;
ebfd146a 7402 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7403 struct loop *loop = NULL;
ebfd146a 7404 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7405 bool nested_in_vect_loop = false;
c716e67f 7406 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7407 tree elem_type;
ebfd146a 7408 tree new_temp;
ef4bddc2 7409 machine_mode mode;
ebfd146a
IR
7410 tree dummy;
7411 enum dr_alignment_support alignment_support_scheme;
7412 tree dataref_ptr = NULL_TREE;
74bf76ed 7413 tree dataref_offset = NULL_TREE;
355fe088 7414 gimple *ptr_incr = NULL;
ebfd146a 7415 int ncopies;
4d694b27
RS
7416 int i, j;
7417 unsigned int group_size;
7418 poly_uint64 group_gap_adj;
ebfd146a
IR
7419 tree msq = NULL_TREE, lsq;
7420 tree offset = NULL_TREE;
356bbc4c 7421 tree byte_offset = NULL_TREE;
ebfd146a 7422 tree realignment_token = NULL_TREE;
538dd0b7 7423 gphi *phi = NULL;
6e1aa848 7424 vec<tree> dr_chain = vNULL;
0d0293ac 7425 bool grouped_load = false;
bffb8014 7426 stmt_vec_info first_stmt_info;
b9787581 7427 stmt_vec_info first_stmt_info_for_drptr = NULL;
ebfd146a
IR
7428 bool inv_p;
7429 bool compute_in_loop = false;
7430 struct loop *at_loop;
7431 int vec_num;
7432 bool slp = (slp_node != NULL);
7433 bool slp_perm = false;
a70d6342 7434 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7435 poly_uint64 vf;
272c6793 7436 tree aggr_type;
134c85ca 7437 gather_scatter_info gs_info;
310213d4 7438 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7439 tree ref_type;
929b4411 7440 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7441
465c8c19
JJ
7442 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7443 return false;
7444
66c16fd9
RB
7445 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7446 && ! vec_stmt)
465c8c19
JJ
7447 return false;
7448
c3a8f964
RS
7449 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7450 if (is_gimple_assign (stmt))
7451 {
7452 scalar_dest = gimple_assign_lhs (stmt);
7453 if (TREE_CODE (scalar_dest) != SSA_NAME)
7454 return false;
465c8c19 7455
c3a8f964
RS
7456 tree_code code = gimple_assign_rhs_code (stmt);
7457 if (code != ARRAY_REF
7458 && code != BIT_FIELD_REF
7459 && code != INDIRECT_REF
7460 && code != COMPONENT_REF
7461 && code != IMAGPART_EXPR
7462 && code != REALPART_EXPR
7463 && code != MEM_REF
7464 && TREE_CODE_CLASS (code) != tcc_declaration)
7465 return false;
7466 }
7467 else
7468 {
7469 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7470 if (!call || !gimple_call_internal_p (call))
7471 return false;
7472
7473 internal_fn ifn = gimple_call_internal_fn (call);
7474 if (!internal_load_fn_p (ifn))
c3a8f964 7475 return false;
465c8c19 7476
c3a8f964
RS
7477 scalar_dest = gimple_call_lhs (call);
7478 if (!scalar_dest)
7479 return false;
7480
7481 if (slp_node != NULL)
7482 {
7483 if (dump_enabled_p ())
7484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7485 "SLP of masked loads not supported.\n");
7486 return false;
7487 }
7488
bfaa08b7
RS
7489 int mask_index = internal_fn_mask_index (ifn);
7490 if (mask_index >= 0)
7491 {
7492 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7493 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7494 &mask_vectype))
bfaa08b7
RS
7495 return false;
7496 }
c3a8f964 7497 }
465c8c19
JJ
7498
7499 if (!STMT_VINFO_DATA_REF (stmt_info))
7500 return false;
7501
7502 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7503 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7504
a70d6342
IR
7505 if (loop_vinfo)
7506 {
7507 loop = LOOP_VINFO_LOOP (loop_vinfo);
7508 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7509 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7510 }
7511 else
3533e503 7512 vf = 1;
ebfd146a
IR
7513
7514 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7515 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7516 case of SLP. */
fce57248 7517 if (slp)
ebfd146a
IR
7518 ncopies = 1;
7519 else
e8f142e2 7520 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7521
7522 gcc_assert (ncopies >= 1);
7523
7524 /* FORNOW. This restriction should be relaxed. */
7525 if (nested_in_vect_loop && ncopies > 1)
7526 {
73fbfcad 7527 if (dump_enabled_p ())
78c60e3d 7528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7529 "multiple types in nested loop.\n");
ebfd146a
IR
7530 return false;
7531 }
7532
f2556b68
RB
7533 /* Invalidate assumptions made by dependence analysis when vectorization
7534 on the unrolled body effectively re-orders stmts. */
7535 if (ncopies > 1
7536 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7537 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7538 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7539 {
7540 if (dump_enabled_p ())
7541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7542 "cannot perform implicit CSE when unrolling "
7543 "with negative dependence distance\n");
7544 return false;
7545 }
7546
7b7b1813 7547 elem_type = TREE_TYPE (vectype);
947131ba 7548 mode = TYPE_MODE (vectype);
ebfd146a
IR
7549
7550 /* FORNOW. In some cases can vectorize even if data-type not supported
7551 (e.g. - data copies). */
947131ba 7552 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7553 {
73fbfcad 7554 if (dump_enabled_p ())
78c60e3d 7555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7556 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7557 return false;
7558 }
7559
ebfd146a 7560 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7561 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7562 {
0d0293ac 7563 grouped_load = true;
ebfd146a 7564 /* FORNOW */
2de001ee
RS
7565 gcc_assert (!nested_in_vect_loop);
7566 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7567
bffb8014
RS
7568 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7569 group_size = DR_GROUP_SIZE (first_stmt_info);
d5f035ea 7570
b1af7da6
RB
7571 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7572 slp_perm = true;
7573
f2556b68
RB
7574 /* Invalidate assumptions made by dependence analysis when vectorization
7575 on the unrolled body effectively re-orders stmts. */
7576 if (!PURE_SLP_STMT (stmt_info)
7577 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7578 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7579 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7580 {
7581 if (dump_enabled_p ())
7582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7583 "cannot perform implicit CSE when performing "
7584 "group loads with negative dependence distance\n");
7585 return false;
7586 }
96bb56b2
RB
7587
7588 /* Similarly when the stmt is a load that is both part of a SLP
7589 instance and a loop vectorized stmt via the same-dr mechanism
7590 we have to give up. */
2c53b149 7591 if (DR_GROUP_SAME_DR_STMT (stmt_info)
96bb56b2 7592 && (STMT_SLP_TYPE (stmt_info)
c26228d4 7593 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
96bb56b2
RB
7594 {
7595 if (dump_enabled_p ())
7596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7597 "conflicting SLP types for CSEd load\n");
7598 return false;
7599 }
ebfd146a 7600 }
7cfb4d93
RS
7601 else
7602 group_size = 1;
ebfd146a 7603
2de001ee 7604 vect_memory_access_type memory_access_type;
7e11fc7f 7605 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7606 &memory_access_type, &gs_info))
7607 return false;
a1e53f3f 7608
c3a8f964
RS
7609 if (mask)
7610 {
7611 if (memory_access_type == VMAT_CONTIGUOUS)
7612 {
7e11fc7f
RS
7613 machine_mode vec_mode = TYPE_MODE (vectype);
7614 if (!VECTOR_MODE_P (vec_mode)
7615 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7616 TYPE_MODE (mask_vectype), true))
7617 return false;
7618 }
bfaa08b7 7619 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7620 {
7621 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7622 tree masktype
7623 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7624 if (TREE_CODE (masktype) == INTEGER_TYPE)
7625 {
7626 if (dump_enabled_p ())
7627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7628 "masked gather with integer mask not"
7629 " supported.");
7630 return false;
7631 }
7632 }
bfaa08b7
RS
7633 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7634 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7635 {
7636 if (dump_enabled_p ())
7637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7638 "unsupported access type for masked load.\n");
7639 return false;
7640 }
7641 }
7642
ebfd146a
IR
7643 if (!vec_stmt) /* transformation not required. */
7644 {
2de001ee
RS
7645 if (!slp)
7646 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7647
7648 if (loop_vinfo
7649 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7650 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7651 memory_access_type, &gs_info);
7cfb4d93 7652
ebfd146a 7653 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
68435eb2
RB
7654 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7655 slp_node_instance, slp_node, cost_vec);
ebfd146a
IR
7656 return true;
7657 }
7658
2de001ee
RS
7659 if (!slp)
7660 gcc_assert (memory_access_type
7661 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7662
73fbfcad 7663 if (dump_enabled_p ())
78c60e3d 7664 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7665 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7666
67b8dbac 7667 /* Transform. */
ebfd146a 7668
f702e7d4 7669 ensure_base_align (dr);
c716e67f 7670
bfaa08b7 7671 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7672 {
929b4411
RS
7673 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7674 mask_dt);
aec7ae7d
JJ
7675 return true;
7676 }
2de001ee
RS
7677
7678 if (memory_access_type == VMAT_ELEMENTWISE
7679 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7680 {
7681 gimple_stmt_iterator incr_gsi;
7682 bool insert_after;
355fe088 7683 gimple *incr;
7d75abc8 7684 tree offvar;
7d75abc8
MM
7685 tree ivstep;
7686 tree running_off;
9771b263 7687 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7688 tree stride_base, stride_step, alias_off;
4d694b27
RS
7689 /* Checked by get_load_store_type. */
7690 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7691 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7692
7cfb4d93 7693 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7694 gcc_assert (!nested_in_vect_loop);
7d75abc8 7695
b210f45f 7696 if (grouped_load)
44fc7854 7697 {
bffb8014
RS
7698 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7699 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
44fc7854 7700 }
ab313a8c 7701 else
44fc7854 7702 {
bffb8014 7703 first_stmt_info = stmt_info;
44fc7854 7704 first_dr = dr;
b210f45f
RB
7705 }
7706 if (slp && grouped_load)
7707 {
bffb8014
RS
7708 group_size = DR_GROUP_SIZE (first_stmt_info);
7709 ref_type = get_group_alias_ptr_type (first_stmt_info);
b210f45f
RB
7710 }
7711 else
7712 {
7713 if (grouped_load)
7714 cst_offset
7715 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
bffb8014
RS
7716 * vect_get_place_in_interleaving_chain (stmt,
7717 first_stmt_info));
44fc7854 7718 group_size = 1;
b210f45f 7719 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7720 }
ab313a8c 7721
14ac6aa2
RB
7722 stride_base
7723 = fold_build_pointer_plus
ab313a8c 7724 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7725 size_binop (PLUS_EXPR,
ab313a8c
RB
7726 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7727 convert_to_ptrofftype (DR_INIT (first_dr))));
7728 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7729
7730 /* For a load with loop-invariant (but other than power-of-2)
7731 stride (i.e. not a grouped access) like so:
7732
7733 for (i = 0; i < n; i += stride)
7734 ... = array[i];
7735
7736 we generate a new induction variable and new accesses to
7737 form a new vector (or vectors, depending on ncopies):
7738
7739 for (j = 0; ; j += VF*stride)
7740 tmp1 = array[j];
7741 tmp2 = array[j + stride];
7742 ...
7743 vectemp = {tmp1, tmp2, ...}
7744 */
7745
ab313a8c
RB
7746 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7747 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7748
7749 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7750
b210f45f
RB
7751 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7752 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7753 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7754 loop, &incr_gsi, insert_after,
7755 &offvar, NULL);
7756 incr = gsi_stmt (incr_gsi);
4fbeb363 7757 loop_vinfo->add_stmt (incr);
7d75abc8 7758
b210f45f 7759 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7760
7761 prev_stmt_info = NULL;
7762 running_off = offvar;
44fc7854 7763 alias_off = build_int_cst (ref_type, 0);
4d694b27 7764 int nloads = const_nunits;
e09b4c37 7765 int lnel = 1;
7b5fc413 7766 tree ltype = TREE_TYPE (vectype);
ea60dd34 7767 tree lvectype = vectype;
b266b968 7768 auto_vec<tree> dr_chain;
2de001ee 7769 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7770 {
4d694b27 7771 if (group_size < const_nunits)
e09b4c37 7772 {
ff03930a
JJ
7773 /* First check if vec_init optab supports construction from
7774 vector elts directly. */
b397965c 7775 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7776 machine_mode vmode;
7777 if (mode_for_vector (elmode, group_size).exists (&vmode)
7778 && VECTOR_MODE_P (vmode)
414fef4e 7779 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7780 && (convert_optab_handler (vec_init_optab,
7781 TYPE_MODE (vectype), vmode)
7782 != CODE_FOR_nothing))
ea60dd34 7783 {
4d694b27 7784 nloads = const_nunits / group_size;
ea60dd34 7785 lnel = group_size;
ff03930a
JJ
7786 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7787 }
7788 else
7789 {
7790 /* Otherwise avoid emitting a constructor of vector elements
7791 by performing the loads using an integer type of the same
7792 size, constructing a vector of those and then
7793 re-interpreting it as the original vector type.
7794 This avoids a huge runtime penalty due to the general
7795 inability to perform store forwarding from smaller stores
7796 to a larger load. */
7797 unsigned lsize
7798 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7799 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7800 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7801 /* If we can't construct such a vector fall back to
7802 element loads of the original vector type. */
4d694b27 7803 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7804 && VECTOR_MODE_P (vmode)
414fef4e 7805 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7806 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7807 != CODE_FOR_nothing))
7808 {
4d694b27 7809 nloads = lnunits;
ff03930a
JJ
7810 lnel = group_size;
7811 ltype = build_nonstandard_integer_type (lsize, 1);
7812 lvectype = build_vector_type (ltype, nloads);
7813 }
ea60dd34 7814 }
e09b4c37 7815 }
2de001ee 7816 else
e09b4c37 7817 {
ea60dd34 7818 nloads = 1;
4d694b27 7819 lnel = const_nunits;
e09b4c37 7820 ltype = vectype;
e09b4c37 7821 }
2de001ee
RS
7822 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7823 }
bb4e4747
BC
7824 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7825 else if (nloads == 1)
7826 ltype = vectype;
7827
2de001ee
RS
7828 if (slp)
7829 {
66c16fd9
RB
7830 /* For SLP permutation support we need to load the whole group,
7831 not only the number of vector stmts the permutation result
7832 fits in. */
b266b968 7833 if (slp_perm)
66c16fd9 7834 {
d9f21f6a
RS
7835 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7836 variable VF. */
7837 unsigned int const_vf = vf.to_constant ();
4d694b27 7838 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7839 dr_chain.create (ncopies);
7840 }
7841 else
7842 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7843 }
4d694b27 7844 unsigned int group_el = 0;
e09b4c37
RB
7845 unsigned HOST_WIDE_INT
7846 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7847 for (j = 0; j < ncopies; j++)
7848 {
7b5fc413 7849 if (nloads > 1)
e09b4c37 7850 vec_alloc (v, nloads);
e1bd7296 7851 stmt_vec_info new_stmt_info = NULL;
e09b4c37 7852 for (i = 0; i < nloads; i++)
7b5fc413 7853 {
e09b4c37 7854 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7855 group_el * elsz + cst_offset);
19986382
RB
7856 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7857 vect_copy_ref_info (data_ref, DR_REF (first_dr));
e1bd7296
RS
7858 gassign *new_stmt
7859 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7860 new_stmt_info
7861 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
e09b4c37
RB
7862 if (nloads > 1)
7863 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7864 gimple_assign_lhs (new_stmt));
7865
7866 group_el += lnel;
7867 if (! slp
7868 || group_el == group_size)
7b5fc413 7869 {
e09b4c37
RB
7870 tree newoff = copy_ssa_name (running_off);
7871 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7872 running_off, stride_step);
7b5fc413
RB
7873 vect_finish_stmt_generation (stmt, incr, gsi);
7874
7875 running_off = newoff;
e09b4c37 7876 group_el = 0;
7b5fc413 7877 }
7b5fc413 7878 }
e09b4c37 7879 if (nloads > 1)
7d75abc8 7880 {
ea60dd34
RB
7881 tree vec_inv = build_constructor (lvectype, v);
7882 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e1bd7296 7883 new_stmt_info = vinfo->lookup_def (new_temp);
ea60dd34
RB
7884 if (lvectype != vectype)
7885 {
e1bd7296
RS
7886 gassign *new_stmt
7887 = gimple_build_assign (make_ssa_name (vectype),
7888 VIEW_CONVERT_EXPR,
7889 build1 (VIEW_CONVERT_EXPR,
7890 vectype, new_temp));
7891 new_stmt_info
7892 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
ea60dd34 7893 }
7d75abc8
MM
7894 }
7895
7b5fc413 7896 if (slp)
b266b968 7897 {
b266b968 7898 if (slp_perm)
e1bd7296 7899 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
66c16fd9 7900 else
e1bd7296 7901 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
b266b968 7902 }
7d75abc8 7903 else
225ce44b
RB
7904 {
7905 if (j == 0)
e1bd7296 7906 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
225ce44b 7907 else
e1bd7296
RS
7908 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7909 prev_stmt_info = new_stmt_info;
225ce44b 7910 }
7d75abc8 7911 }
b266b968 7912 if (slp_perm)
29afecdf
RB
7913 {
7914 unsigned n_perms;
7915 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7916 slp_node_instance, false, &n_perms);
7917 }
7d75abc8
MM
7918 return true;
7919 }
aec7ae7d 7920
b5ec4de7
RS
7921 if (memory_access_type == VMAT_GATHER_SCATTER
7922 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7923 grouped_load = false;
7924
0d0293ac 7925 if (grouped_load)
ebfd146a 7926 {
bffb8014
RS
7927 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7928 group_size = DR_GROUP_SIZE (first_stmt_info);
4f0a0218 7929 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7930 without permutation. */
7931 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
bffb8014 7932 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4f0a0218
RB
7933 /* For BB vectorization always use the first stmt to base
7934 the data ref pointer on. */
7935 if (bb_vinfo)
b9787581 7936 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7937
ebfd146a 7938 /* Check if the chain of loads is already vectorized. */
bffb8014 7939 if (STMT_VINFO_VEC_STMT (first_stmt_info)
01d8bf07
RB
7940 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7941 ??? But we can only do so if there is exactly one
7942 as we have no way to get at the rest. Leave the CSE
7943 opportunity alone.
7944 ??? With the group load eventually participating
7945 in multiple different permutations (having multiple
7946 slp nodes which refer to the same group) the CSE
7947 is even wrong code. See PR56270. */
7948 && !slp)
ebfd146a
IR
7949 {
7950 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7951 return true;
7952 }
bffb8014 7953 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
9b999e8c 7954 group_gap_adj = 0;
ebfd146a
IR
7955
7956 /* VEC_NUM is the number of vect stmts to be created for this group. */
7957 if (slp)
7958 {
0d0293ac 7959 grouped_load = false;
91ff1504
RB
7960 /* For SLP permutation support we need to load the whole group,
7961 not only the number of vector stmts the permutation result
7962 fits in. */
7963 if (slp_perm)
b267968e 7964 {
d9f21f6a
RS
7965 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7966 variable VF. */
7967 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7968 unsigned int const_nunits = nunits.to_constant ();
7969 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7970 group_gap_adj = vf * group_size - nunits * vec_num;
7971 }
91ff1504 7972 else
b267968e
RB
7973 {
7974 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7975 group_gap_adj
7976 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7977 }
a70d6342 7978 }
ebfd146a 7979 else
9b999e8c 7980 vec_num = group_size;
44fc7854 7981
bffb8014 7982 ref_type = get_group_alias_ptr_type (first_stmt_info);
ebfd146a
IR
7983 }
7984 else
7985 {
bffb8014 7986 first_stmt_info = stmt_info;
ebfd146a
IR
7987 first_dr = dr;
7988 group_size = vec_num = 1;
9b999e8c 7989 group_gap_adj = 0;
44fc7854 7990 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7991 }
7992
720f5239 7993 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7994 gcc_assert (alignment_support_scheme);
70088b95
RS
7995 vec_loop_masks *loop_masks
7996 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7997 ? &LOOP_VINFO_MASKS (loop_vinfo)
7998 : NULL);
7cfb4d93
RS
7999 /* Targets with store-lane instructions must not require explicit
8000 realignment. vect_supportable_dr_alignment always returns either
8001 dr_aligned or dr_unaligned_supported for masked operations. */
8002 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8003 && !mask
70088b95 8004 && !loop_masks)
272c6793
RS
8005 || alignment_support_scheme == dr_aligned
8006 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
8007
8008 /* In case the vectorization factor (VF) is bigger than the number
8009 of elements that we can fit in a vectype (nunits), we have to generate
8010 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 8011 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 8012 from one copy of the vector stmt to the next, in the field
ff802fa1 8013 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 8014 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
8015 stmts that use the defs of the current stmt. The example below
8016 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8017 need to create 4 vectorized stmts):
ebfd146a
IR
8018
8019 before vectorization:
8020 RELATED_STMT VEC_STMT
8021 S1: x = memref - -
8022 S2: z = x + 1 - -
8023
8024 step 1: vectorize stmt S1:
8025 We first create the vector stmt VS1_0, and, as usual, record a
8026 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8027 Next, we create the vector stmt VS1_1, and record a pointer to
8028 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 8029 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
8030 stmts and pointers:
8031 RELATED_STMT VEC_STMT
8032 VS1_0: vx0 = memref0 VS1_1 -
8033 VS1_1: vx1 = memref1 VS1_2 -
8034 VS1_2: vx2 = memref2 VS1_3 -
8035 VS1_3: vx3 = memref3 - -
8036 S1: x = load - VS1_0
8037 S2: z = x + 1 - -
8038
b8698a0f
L
8039 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8040 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
8041 stmt S2. */
8042
0d0293ac 8043 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
8044
8045 S1: x2 = &base + 2
8046 S2: x0 = &base
8047 S3: x1 = &base + 1
8048 S4: x3 = &base + 3
8049
b8698a0f 8050 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
8051 starting from the access of the first stmt of the chain:
8052
8053 VS1: vx0 = &base
8054 VS2: vx1 = &base + vec_size*1
8055 VS3: vx3 = &base + vec_size*2
8056 VS4: vx4 = &base + vec_size*3
8057
8058 Then permutation statements are generated:
8059
e2c83630
RH
8060 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8061 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
8062 ...
8063
8064 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8065 (the order of the data-refs in the output of vect_permute_load_chain
8066 corresponds to the order of scalar stmts in the interleaving chain - see
8067 the documentation of vect_permute_load_chain()).
8068 The generation of permutation stmts and recording them in
0d0293ac 8069 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 8070
b8698a0f 8071 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
8072 permutation stmts above are created for every copy. The result vector
8073 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8074 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
8075
8076 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8077 on a target that supports unaligned accesses (dr_unaligned_supported)
8078 we generate the following code:
8079 p = initial_addr;
8080 indx = 0;
8081 loop {
8082 p = p + indx * vectype_size;
8083 vec_dest = *(p);
8084 indx = indx + 1;
8085 }
8086
8087 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 8088 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
8089 then generate the following code, in which the data in each iteration is
8090 obtained by two vector loads, one from the previous iteration, and one
8091 from the current iteration:
8092 p1 = initial_addr;
8093 msq_init = *(floor(p1))
8094 p2 = initial_addr + VS - 1;
8095 realignment_token = call target_builtin;
8096 indx = 0;
8097 loop {
8098 p2 = p2 + indx * vectype_size
8099 lsq = *(floor(p2))
8100 vec_dest = realign_load (msq, lsq, realignment_token)
8101 indx = indx + 1;
8102 msq = lsq;
8103 } */
8104
8105 /* If the misalignment remains the same throughout the execution of the
8106 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 8107 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
8108 This can only occur when vectorizing memory accesses in the inner-loop
8109 nested within an outer-loop that is being vectorized. */
8110
d1e4b493 8111 if (nested_in_vect_loop
cf098191
RS
8112 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8113 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
8114 {
8115 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8116 compute_in_loop = true;
8117 }
8118
8119 if ((alignment_support_scheme == dr_explicit_realign_optimized
8120 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 8121 && !compute_in_loop)
ebfd146a 8122 {
bffb8014 8123 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
ebfd146a
IR
8124 alignment_support_scheme, NULL_TREE,
8125 &at_loop);
8126 if (alignment_support_scheme == dr_explicit_realign_optimized)
8127 {
538dd0b7 8128 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
8129 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8130 size_one_node);
ebfd146a
IR
8131 }
8132 }
8133 else
8134 at_loop = loop;
8135
62da9e14 8136 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
8137 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8138
ab2fc782
RS
8139 tree bump;
8140 tree vec_offset = NULL_TREE;
8141 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8142 {
8143 aggr_type = NULL_TREE;
8144 bump = NULL_TREE;
8145 }
8146 else if (memory_access_type == VMAT_GATHER_SCATTER)
8147 {
8148 aggr_type = elem_type;
8149 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8150 &bump, &vec_offset);
8151 }
272c6793 8152 else
ab2fc782
RS
8153 {
8154 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8155 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8156 else
8157 aggr_type = vectype;
8158 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8159 }
272c6793 8160
c3a8f964 8161 tree vec_mask = NULL_TREE;
ebfd146a 8162 prev_stmt_info = NULL;
4d694b27 8163 poly_uint64 group_elt = 0;
ebfd146a 8164 for (j = 0; j < ncopies; j++)
b8698a0f 8165 {
e1bd7296 8166 stmt_vec_info new_stmt_info = NULL;
272c6793 8167 /* 1. Create the vector or array pointer update chain. */
ebfd146a 8168 if (j == 0)
74bf76ed
JJ
8169 {
8170 bool simd_lane_access_p
8171 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8172 if (simd_lane_access_p
8173 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8174 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8175 && integer_zerop (DR_OFFSET (first_dr))
8176 && integer_zerop (DR_INIT (first_dr))
8177 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 8178 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
8179 && (alignment_support_scheme == dr_aligned
8180 || alignment_support_scheme == dr_unaligned_supported))
8181 {
8182 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 8183 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 8184 inv_p = false;
74bf76ed 8185 }
b9787581 8186 else if (first_stmt_info_for_drptr
bffb8014 8187 && first_stmt_info != first_stmt_info_for_drptr)
4f0a0218
RB
8188 {
8189 dataref_ptr
b9787581
RS
8190 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8191 aggr_type, at_loop, offset, &dummy,
8192 gsi, &ptr_incr, simd_lane_access_p,
ab2fc782 8193 &inv_p, byte_offset, bump);
4f0a0218
RB
8194 /* Adjust the pointer by the difference to first_stmt. */
8195 data_reference_p ptrdr
b9787581 8196 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
4f0a0218
RB
8197 tree diff = fold_convert (sizetype,
8198 size_binop (MINUS_EXPR,
8199 DR_INIT (first_dr),
8200 DR_INIT (ptrdr)));
8201 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8202 stmt, diff);
8203 }
bfaa08b7
RS
8204 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8205 {
8206 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8207 &dataref_ptr, &vec_offset);
8208 inv_p = false;
8209 }
74bf76ed
JJ
8210 else
8211 dataref_ptr
bffb8014 8212 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
74bf76ed 8213 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8214 simd_lane_access_p, &inv_p,
ab2fc782 8215 byte_offset, bump);
c3a8f964
RS
8216 if (mask)
8217 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8218 mask_vectype);
74bf76ed 8219 }
ebfd146a 8220 else
c3a8f964
RS
8221 {
8222 if (dataref_offset)
8223 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8224 bump);
bfaa08b7 8225 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8226 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8227 vec_offset);
c3a8f964 8228 else
ab2fc782
RS
8229 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8230 stmt, bump);
c3a8f964 8231 if (mask)
929b4411 8232 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8233 }
ebfd146a 8234
0d0293ac 8235 if (grouped_load || slp_perm)
9771b263 8236 dr_chain.create (vec_num);
5ce1ee7f 8237
2de001ee 8238 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8239 {
272c6793
RS
8240 tree vec_array;
8241
8242 vec_array = create_vector_array (vectype, vec_num);
8243
7cfb4d93 8244 tree final_mask = NULL_TREE;
70088b95
RS
8245 if (loop_masks)
8246 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8247 vectype, j);
7cfb4d93
RS
8248 if (vec_mask)
8249 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8250 vec_mask, gsi);
8251
7e11fc7f 8252 gcall *call;
7cfb4d93 8253 if (final_mask)
7e11fc7f
RS
8254 {
8255 /* Emit:
8256 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8257 VEC_MASK). */
8258 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8259 tree alias_ptr = build_int_cst (ref_type, align);
8260 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8261 dataref_ptr, alias_ptr,
7cfb4d93 8262 final_mask);
7e11fc7f
RS
8263 }
8264 else
8265 {
8266 /* Emit:
8267 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8268 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8269 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8270 }
a844293d
RS
8271 gimple_call_set_lhs (call, vec_array);
8272 gimple_call_set_nothrow (call, true);
e1bd7296 8273 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
ebfd146a 8274
272c6793
RS
8275 /* Extract each vector into an SSA_NAME. */
8276 for (i = 0; i < vec_num; i++)
ebfd146a 8277 {
272c6793
RS
8278 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8279 vec_array, i);
9771b263 8280 dr_chain.quick_push (new_temp);
272c6793
RS
8281 }
8282
8283 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8284 vect_record_grouped_load_vectors (stmt, dr_chain);
3ba4ff41
RS
8285
8286 /* Record that VEC_ARRAY is now dead. */
8287 vect_clobber_variable (stmt, gsi, vec_array);
272c6793
RS
8288 }
8289 else
8290 {
8291 for (i = 0; i < vec_num; i++)
8292 {
7cfb4d93 8293 tree final_mask = NULL_TREE;
70088b95 8294 if (loop_masks
7cfb4d93 8295 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8296 final_mask = vect_get_loop_mask (gsi, loop_masks,
8297 vec_num * ncopies,
7cfb4d93
RS
8298 vectype, vec_num * j + i);
8299 if (vec_mask)
8300 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8301 vec_mask, gsi);
8302
272c6793
RS
8303 if (i > 0)
8304 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8305 stmt, bump);
272c6793
RS
8306
8307 /* 2. Create the vector-load in the loop. */
e1bd7296 8308 gimple *new_stmt = NULL;
272c6793
RS
8309 switch (alignment_support_scheme)
8310 {
8311 case dr_aligned:
8312 case dr_unaligned_supported:
be1ac4ec 8313 {
644ffefd
MJ
8314 unsigned int align, misalign;
8315
bfaa08b7
RS
8316 if (memory_access_type == VMAT_GATHER_SCATTER)
8317 {
8318 tree scale = size_int (gs_info.scale);
8319 gcall *call;
70088b95 8320 if (loop_masks)
bfaa08b7
RS
8321 call = gimple_build_call_internal
8322 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8323 vec_offset, scale, final_mask);
8324 else
8325 call = gimple_build_call_internal
8326 (IFN_GATHER_LOAD, 3, dataref_ptr,
8327 vec_offset, scale);
8328 gimple_call_set_nothrow (call, true);
8329 new_stmt = call;
8330 data_ref = NULL_TREE;
8331 break;
8332 }
8333
f702e7d4 8334 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8335 if (alignment_support_scheme == dr_aligned)
8336 {
8337 gcc_assert (aligned_access_p (first_dr));
644ffefd 8338 misalign = 0;
272c6793
RS
8339 }
8340 else if (DR_MISALIGNMENT (first_dr) == -1)
8341 {
25f68d90 8342 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8343 misalign = 0;
272c6793
RS
8344 }
8345 else
c3a8f964 8346 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8347 if (dataref_offset == NULL_TREE
8348 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8349 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8350 align, misalign);
c3a8f964 8351
7cfb4d93 8352 if (final_mask)
c3a8f964
RS
8353 {
8354 align = least_bit_hwi (misalign | align);
8355 tree ptr = build_int_cst (ref_type, align);
8356 gcall *call
8357 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8358 dataref_ptr, ptr,
7cfb4d93 8359 final_mask);
c3a8f964
RS
8360 gimple_call_set_nothrow (call, true);
8361 new_stmt = call;
8362 data_ref = NULL_TREE;
8363 }
8364 else
8365 {
8366 data_ref
8367 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8368 dataref_offset
8369 ? dataref_offset
8370 : build_int_cst (ref_type, 0));
8371 if (alignment_support_scheme == dr_aligned)
8372 ;
8373 else if (DR_MISALIGNMENT (first_dr) == -1)
8374 TREE_TYPE (data_ref)
8375 = build_aligned_type (TREE_TYPE (data_ref),
8376 align * BITS_PER_UNIT);
8377 else
8378 TREE_TYPE (data_ref)
8379 = build_aligned_type (TREE_TYPE (data_ref),
8380 TYPE_ALIGN (elem_type));
8381 }
272c6793 8382 break;
be1ac4ec 8383 }
272c6793 8384 case dr_explicit_realign:
267d3070 8385 {
272c6793 8386 tree ptr, bump;
272c6793 8387
d88981fc 8388 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8389
8390 if (compute_in_loop)
bffb8014 8391 msq = vect_setup_realignment (first_stmt_info, gsi,
272c6793
RS
8392 &realignment_token,
8393 dr_explicit_realign,
8394 dataref_ptr, NULL);
8395
aed93b23
RB
8396 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8397 ptr = copy_ssa_name (dataref_ptr);
8398 else
8399 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8400 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8401 new_stmt = gimple_build_assign
8402 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8403 build_int_cst
8404 (TREE_TYPE (dataref_ptr),
f702e7d4 8405 -(HOST_WIDE_INT) align));
272c6793
RS
8406 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8407 data_ref
8408 = build2 (MEM_REF, vectype, ptr,
44fc7854 8409 build_int_cst (ref_type, 0));
19986382 8410 vect_copy_ref_info (data_ref, DR_REF (first_dr));
272c6793
RS
8411 vec_dest = vect_create_destination_var (scalar_dest,
8412 vectype);
8413 new_stmt = gimple_build_assign (vec_dest, data_ref);
8414 new_temp = make_ssa_name (vec_dest, new_stmt);
8415 gimple_assign_set_lhs (new_stmt, new_temp);
8416 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8417 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8418 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8419 msq = new_temp;
8420
d88981fc 8421 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8422 TYPE_SIZE_UNIT (elem_type));
d88981fc 8423 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8424 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8425 new_stmt = gimple_build_assign
8426 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8427 build_int_cst
f702e7d4 8428 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8429 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8430 gimple_assign_set_lhs (new_stmt, ptr);
8431 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8432 data_ref
8433 = build2 (MEM_REF, vectype, ptr,
44fc7854 8434 build_int_cst (ref_type, 0));
272c6793 8435 break;
267d3070 8436 }
272c6793 8437 case dr_explicit_realign_optimized:
f702e7d4
RS
8438 {
8439 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8440 new_temp = copy_ssa_name (dataref_ptr);
8441 else
8442 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8443 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8444 new_stmt = gimple_build_assign
8445 (new_temp, BIT_AND_EXPR, dataref_ptr,
8446 build_int_cst (TREE_TYPE (dataref_ptr),
8447 -(HOST_WIDE_INT) align));
8448 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8449 data_ref
8450 = build2 (MEM_REF, vectype, new_temp,
8451 build_int_cst (ref_type, 0));
8452 break;
8453 }
272c6793
RS
8454 default:
8455 gcc_unreachable ();
8456 }
ebfd146a 8457 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8458 /* DATA_REF is null if we've already built the statement. */
8459 if (data_ref)
19986382
RB
8460 {
8461 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8462 new_stmt = gimple_build_assign (vec_dest, data_ref);
8463 }
ebfd146a 8464 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8465 gimple_set_lhs (new_stmt, new_temp);
e1bd7296
RS
8466 new_stmt_info
8467 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8468
272c6793
RS
8469 /* 3. Handle explicit realignment if necessary/supported.
8470 Create in loop:
8471 vec_dest = realign_load (msq, lsq, realignment_token) */
8472 if (alignment_support_scheme == dr_explicit_realign_optimized
8473 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8474 {
272c6793
RS
8475 lsq = gimple_assign_lhs (new_stmt);
8476 if (!realignment_token)
8477 realignment_token = dataref_ptr;
8478 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8479 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8480 msq, lsq, realignment_token);
272c6793
RS
8481 new_temp = make_ssa_name (vec_dest, new_stmt);
8482 gimple_assign_set_lhs (new_stmt, new_temp);
e1bd7296
RS
8483 new_stmt_info
8484 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
8485
8486 if (alignment_support_scheme == dr_explicit_realign_optimized)
8487 {
8488 gcc_assert (phi);
8489 if (i == vec_num - 1 && j == ncopies - 1)
8490 add_phi_arg (phi, lsq,
8491 loop_latch_edge (containing_loop),
9e227d60 8492 UNKNOWN_LOCATION);
272c6793
RS
8493 msq = lsq;
8494 }
ebfd146a 8495 }
ebfd146a 8496
59fd17e3
RB
8497 /* 4. Handle invariant-load. */
8498 if (inv_p && !bb_vinfo)
8499 {
59fd17e3 8500 gcc_assert (!grouped_load);
d1417442
JJ
8501 /* If we have versioned for aliasing or the loop doesn't
8502 have any data dependencies that would preclude this,
8503 then we are sure this is a loop invariant load and
8504 thus we can insert it on the preheader edge. */
8505 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8506 && !nested_in_vect_loop
6b916b36 8507 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8508 {
8509 if (dump_enabled_p ())
8510 {
8511 dump_printf_loc (MSG_NOTE, vect_location,
8512 "hoisting out of the vectorized "
8513 "loop: ");
8514 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8515 }
b731b390 8516 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8517 gsi_insert_on_edge_immediate
8518 (loop_preheader_edge (loop),
8519 gimple_build_assign (tem,
8520 unshare_expr
8521 (gimple_assign_rhs1 (stmt))));
8522 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5 8523 new_stmt = SSA_NAME_DEF_STMT (new_temp);
e1bd7296 8524 new_stmt_info = vinfo->add_stmt (new_stmt);
a0e35eb0
RB
8525 }
8526 else
8527 {
8528 gimple_stmt_iterator gsi2 = *gsi;
8529 gsi_next (&gsi2);
8530 new_temp = vect_init_vector (stmt, scalar_dest,
8531 vectype, &gsi2);
e1bd7296 8532 new_stmt_info = vinfo->lookup_def (new_temp);
a0e35eb0 8533 }
59fd17e3
RB
8534 }
8535
62da9e14 8536 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8537 {
aec7ae7d
JJ
8538 tree perm_mask = perm_mask_for_reverse (vectype);
8539 new_temp = permute_vec_elements (new_temp, new_temp,
8540 perm_mask, stmt, gsi);
e1bd7296 8541 new_stmt_info = vinfo->lookup_def (new_temp);
ebfd146a 8542 }
267d3070 8543
272c6793 8544 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8545 vect_transform_grouped_load (). */
8546 if (grouped_load || slp_perm)
9771b263 8547 dr_chain.quick_push (new_temp);
267d3070 8548
272c6793
RS
8549 /* Store vector loads in the corresponding SLP_NODE. */
8550 if (slp && !slp_perm)
e1bd7296 8551 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
b267968e
RB
8552
8553 /* With SLP permutation we load the gaps as well, without
8554 we need to skip the gaps after we manage to fully load
2c53b149 8555 all elements. group_gap_adj is DR_GROUP_SIZE here. */
b267968e 8556 group_elt += nunits;
d9f21f6a
RS
8557 if (maybe_ne (group_gap_adj, 0U)
8558 && !slp_perm
8559 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8560 {
d9f21f6a
RS
8561 poly_wide_int bump_val
8562 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8563 * group_gap_adj);
8e6cdc90 8564 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8565 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8566 stmt, bump);
8567 group_elt = 0;
8568 }
272c6793 8569 }
9b999e8c
RB
8570 /* Bump the vector pointer to account for a gap or for excess
8571 elements loaded for a permuted SLP load. */
d9f21f6a 8572 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8573 {
d9f21f6a
RS
8574 poly_wide_int bump_val
8575 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8576 * group_gap_adj);
8e6cdc90 8577 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8578 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8579 stmt, bump);
8580 }
ebfd146a
IR
8581 }
8582
8583 if (slp && !slp_perm)
8584 continue;
8585
8586 if (slp_perm)
8587 {
29afecdf 8588 unsigned n_perms;
01d8bf07 8589 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8590 slp_node_instance, false,
8591 &n_perms))
ebfd146a 8592 {
9771b263 8593 dr_chain.release ();
ebfd146a
IR
8594 return false;
8595 }
8596 }
8597 else
8598 {
0d0293ac 8599 if (grouped_load)
ebfd146a 8600 {
2de001ee 8601 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8602 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8603 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8604 }
8605 else
8606 {
8607 if (j == 0)
e1bd7296 8608 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
ebfd146a 8609 else
e1bd7296
RS
8610 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8611 prev_stmt_info = new_stmt_info;
ebfd146a
IR
8612 }
8613 }
9771b263 8614 dr_chain.release ();
ebfd146a
IR
8615 }
8616
ebfd146a
IR
8617 return true;
8618}
8619
8620/* Function vect_is_simple_cond.
b8698a0f 8621
ebfd146a
IR
8622 Input:
8623 LOOP - the loop that is being vectorized.
8624 COND - Condition that is checked for simple use.
8625
e9e1d143
RG
8626 Output:
8627 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8628 *DTS - The def types for the arguments of the comparison
e9e1d143 8629
ebfd146a
IR
8630 Returns whether a COND can be vectorized. Checks whether
8631 condition operands are supportable using vec_is_simple_use. */
8632
87aab9b2 8633static bool
4fc5ebf1 8634vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8635 tree *comp_vectype, enum vect_def_type *dts,
8636 tree vectype)
ebfd146a
IR
8637{
8638 tree lhs, rhs;
e9e1d143 8639 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8640
a414c77f
IE
8641 /* Mask case. */
8642 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8643 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f 8644 {
894dd753 8645 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
a414c77f
IE
8646 || !*comp_vectype
8647 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8648 return false;
8649 return true;
8650 }
8651
ebfd146a
IR
8652 if (!COMPARISON_CLASS_P (cond))
8653 return false;
8654
8655 lhs = TREE_OPERAND (cond, 0);
8656 rhs = TREE_OPERAND (cond, 1);
8657
8658 if (TREE_CODE (lhs) == SSA_NAME)
8659 {
894dd753 8660 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
ebfd146a
IR
8661 return false;
8662 }
4fc5ebf1
JG
8663 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8664 || TREE_CODE (lhs) == FIXED_CST)
8665 dts[0] = vect_constant_def;
8666 else
ebfd146a
IR
8667 return false;
8668
8669 if (TREE_CODE (rhs) == SSA_NAME)
8670 {
894dd753 8671 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
ebfd146a
IR
8672 return false;
8673 }
4fc5ebf1
JG
8674 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8675 || TREE_CODE (rhs) == FIXED_CST)
8676 dts[1] = vect_constant_def;
8677 else
ebfd146a
IR
8678 return false;
8679
28b33016 8680 if (vectype1 && vectype2
928686b1
RS
8681 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8682 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8683 return false;
8684
e9e1d143 8685 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8 8686 /* Invariant comparison. */
4515e413 8687 if (! *comp_vectype && vectype)
8da4c8d8
RB
8688 {
8689 tree scalar_type = TREE_TYPE (lhs);
8690 /* If we can widen the comparison to match vectype do so. */
8691 if (INTEGRAL_TYPE_P (scalar_type)
8692 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8693 TYPE_SIZE (TREE_TYPE (vectype))))
8694 scalar_type = build_nonstandard_integer_type
8695 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8696 TYPE_UNSIGNED (scalar_type));
8697 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8698 }
8699
ebfd146a
IR
8700 return true;
8701}
8702
8703/* vectorizable_condition.
8704
b8698a0f
L
8705 Check if STMT is conditional modify expression that can be vectorized.
8706 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8707 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8708 at GSI.
8709
8710 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8711 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8712 else clause if it is 2).
ebfd146a
IR
8713
8714 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8715
4bbe8262 8716bool
355fe088 8717vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195
RS
8718 stmt_vec_info *vec_stmt, tree reduc_def,
8719 int reduc_index, slp_tree slp_node,
8720 stmt_vector_for_cost *cost_vec)
ebfd146a
IR
8721{
8722 tree scalar_dest = NULL_TREE;
8723 tree vec_dest = NULL_TREE;
01216d27
JJ
8724 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8725 tree then_clause, else_clause;
ebfd146a 8726 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8727 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8728 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8729 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8730 tree vec_compare;
ebfd146a
IR
8731 tree new_temp;
8732 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8733 enum vect_def_type dts[4]
8734 = {vect_unknown_def_type, vect_unknown_def_type,
8735 vect_unknown_def_type, vect_unknown_def_type};
8736 int ndts = 4;
f7e531cf 8737 int ncopies;
01216d27 8738 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8739 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8740 int i, j;
8741 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8742 vec<tree> vec_oprnds0 = vNULL;
8743 vec<tree> vec_oprnds1 = vNULL;
8744 vec<tree> vec_oprnds2 = vNULL;
8745 vec<tree> vec_oprnds3 = vNULL;
74946978 8746 tree vec_cmp_type;
a414c77f 8747 bool masked = false;
b8698a0f 8748
f7e531cf
IR
8749 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8750 return false;
8751
bb6c2b68
RS
8752 vect_reduction_type reduction_type
8753 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8754 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8755 {
8756 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8757 return false;
ebfd146a 8758
af29617a
AH
8759 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8760 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8761 && reduc_def))
8762 return false;
ebfd146a 8763
af29617a
AH
8764 /* FORNOW: not yet supported. */
8765 if (STMT_VINFO_LIVE_P (stmt_info))
8766 {
8767 if (dump_enabled_p ())
8768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8769 "value used after loop.\n");
8770 return false;
8771 }
ebfd146a
IR
8772 }
8773
8774 /* Is vectorizable conditional operation? */
8775 if (!is_gimple_assign (stmt))
8776 return false;
8777
8778 code = gimple_assign_rhs_code (stmt);
8779
8780 if (code != COND_EXPR)
8781 return false;
8782
465c8c19 8783 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8784 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8785
fce57248 8786 if (slp_node)
465c8c19
JJ
8787 ncopies = 1;
8788 else
e8f142e2 8789 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8790
8791 gcc_assert (ncopies >= 1);
8792 if (reduc_index && ncopies > 1)
8793 return false; /* FORNOW */
8794
4e71066d
RG
8795 cond_expr = gimple_assign_rhs1 (stmt);
8796 then_clause = gimple_assign_rhs2 (stmt);
8797 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8798
4fc5ebf1 8799 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
4515e413 8800 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
e9e1d143 8801 || !comp_vectype)
ebfd146a
IR
8802 return false;
8803
894dd753 8804 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
2947d3b2 8805 return false;
894dd753 8806 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
ebfd146a 8807 return false;
2947d3b2
IE
8808
8809 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8810 return false;
8811
8812 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8813 return false;
8814
28b33016
IE
8815 masked = !COMPARISON_CLASS_P (cond_expr);
8816 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8817
74946978
MP
8818 if (vec_cmp_type == NULL_TREE)
8819 return false;
784fb9b3 8820
01216d27
JJ
8821 cond_code = TREE_CODE (cond_expr);
8822 if (!masked)
8823 {
8824 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8825 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8826 }
8827
8828 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8829 {
8830 /* Boolean values may have another representation in vectors
8831 and therefore we prefer bit operations over comparison for
8832 them (which also works for scalar masks). We store opcodes
8833 to use in bitop1 and bitop2. Statement is vectorized as
8834 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8835 depending on bitop1 and bitop2 arity. */
8836 switch (cond_code)
8837 {
8838 case GT_EXPR:
8839 bitop1 = BIT_NOT_EXPR;
8840 bitop2 = BIT_AND_EXPR;
8841 break;
8842 case GE_EXPR:
8843 bitop1 = BIT_NOT_EXPR;
8844 bitop2 = BIT_IOR_EXPR;
8845 break;
8846 case LT_EXPR:
8847 bitop1 = BIT_NOT_EXPR;
8848 bitop2 = BIT_AND_EXPR;
8849 std::swap (cond_expr0, cond_expr1);
8850 break;
8851 case LE_EXPR:
8852 bitop1 = BIT_NOT_EXPR;
8853 bitop2 = BIT_IOR_EXPR;
8854 std::swap (cond_expr0, cond_expr1);
8855 break;
8856 case NE_EXPR:
8857 bitop1 = BIT_XOR_EXPR;
8858 break;
8859 case EQ_EXPR:
8860 bitop1 = BIT_XOR_EXPR;
8861 bitop2 = BIT_NOT_EXPR;
8862 break;
8863 default:
8864 return false;
8865 }
8866 cond_code = SSA_NAME;
8867 }
8868
b8698a0f 8869 if (!vec_stmt)
ebfd146a 8870 {
01216d27
JJ
8871 if (bitop1 != NOP_EXPR)
8872 {
8873 machine_mode mode = TYPE_MODE (comp_vectype);
8874 optab optab;
8875
8876 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8877 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8878 return false;
8879
8880 if (bitop2 != NOP_EXPR)
8881 {
8882 optab = optab_for_tree_code (bitop2, comp_vectype,
8883 optab_default);
8884 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8885 return false;
8886 }
8887 }
4fc5ebf1
JG
8888 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8889 cond_code))
8890 {
68435eb2
RB
8891 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8892 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8893 cost_vec);
4fc5ebf1
JG
8894 return true;
8895 }
8896 return false;
ebfd146a
IR
8897 }
8898
f7e531cf
IR
8899 /* Transform. */
8900
8901 if (!slp_node)
8902 {
9771b263
DN
8903 vec_oprnds0.create (1);
8904 vec_oprnds1.create (1);
8905 vec_oprnds2.create (1);
8906 vec_oprnds3.create (1);
f7e531cf 8907 }
ebfd146a
IR
8908
8909 /* Handle def. */
8910 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8911 if (reduction_type != EXTRACT_LAST_REDUCTION)
8912 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8913
8914 /* Handle cond expr. */
a855b1b1
MM
8915 for (j = 0; j < ncopies; j++)
8916 {
e1bd7296 8917 stmt_vec_info new_stmt_info = NULL;
a855b1b1
MM
8918 if (j == 0)
8919 {
f7e531cf
IR
8920 if (slp_node)
8921 {
00f96dc9
TS
8922 auto_vec<tree, 4> ops;
8923 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8924
a414c77f 8925 if (masked)
01216d27 8926 ops.safe_push (cond_expr);
a414c77f
IE
8927 else
8928 {
01216d27
JJ
8929 ops.safe_push (cond_expr0);
8930 ops.safe_push (cond_expr1);
a414c77f 8931 }
9771b263
DN
8932 ops.safe_push (then_clause);
8933 ops.safe_push (else_clause);
306b0c92 8934 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8935 vec_oprnds3 = vec_defs.pop ();
8936 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8937 if (!masked)
8938 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8939 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8940 }
8941 else
8942 {
a414c77f
IE
8943 if (masked)
8944 {
8945 vec_cond_lhs
8946 = vect_get_vec_def_for_operand (cond_expr, stmt,
8947 comp_vectype);
894dd753 8948 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
a414c77f
IE
8949 }
8950 else
8951 {
01216d27
JJ
8952 vec_cond_lhs
8953 = vect_get_vec_def_for_operand (cond_expr0,
8954 stmt, comp_vectype);
894dd753 8955 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
01216d27
JJ
8956
8957 vec_cond_rhs
8958 = vect_get_vec_def_for_operand (cond_expr1,
8959 stmt, comp_vectype);
894dd753 8960 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
a414c77f 8961 }
f7e531cf
IR
8962 if (reduc_index == 1)
8963 vec_then_clause = reduc_def;
8964 else
8965 {
8966 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241 8967 stmt);
894dd753 8968 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
f7e531cf
IR
8969 }
8970 if (reduc_index == 2)
8971 vec_else_clause = reduc_def;
8972 else
8973 {
8974 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241 8975 stmt);
894dd753 8976 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
f7e531cf 8977 }
a855b1b1
MM
8978 }
8979 }
8980 else
8981 {
a414c77f
IE
8982 vec_cond_lhs
8983 = vect_get_vec_def_for_stmt_copy (dts[0],
8984 vec_oprnds0.pop ());
8985 if (!masked)
8986 vec_cond_rhs
8987 = vect_get_vec_def_for_stmt_copy (dts[1],
8988 vec_oprnds1.pop ());
8989
a855b1b1 8990 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8991 vec_oprnds2.pop ());
a855b1b1 8992 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8993 vec_oprnds3.pop ());
f7e531cf
IR
8994 }
8995
8996 if (!slp_node)
8997 {
9771b263 8998 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8999 if (!masked)
9000 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
9001 vec_oprnds2.quick_push (vec_then_clause);
9002 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
9003 }
9004
9dc3f7de 9005 /* Arguments are ready. Create the new vector stmt. */
9771b263 9006 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 9007 {
9771b263
DN
9008 vec_then_clause = vec_oprnds2[i];
9009 vec_else_clause = vec_oprnds3[i];
a855b1b1 9010
a414c77f
IE
9011 if (masked)
9012 vec_compare = vec_cond_lhs;
9013 else
9014 {
9015 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
9016 if (bitop1 == NOP_EXPR)
9017 vec_compare = build2 (cond_code, vec_cmp_type,
9018 vec_cond_lhs, vec_cond_rhs);
9019 else
9020 {
9021 new_temp = make_ssa_name (vec_cmp_type);
e1bd7296 9022 gassign *new_stmt;
01216d27
JJ
9023 if (bitop1 == BIT_NOT_EXPR)
9024 new_stmt = gimple_build_assign (new_temp, bitop1,
9025 vec_cond_rhs);
9026 else
9027 new_stmt
9028 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9029 vec_cond_rhs);
9030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9031 if (bitop2 == NOP_EXPR)
9032 vec_compare = new_temp;
9033 else if (bitop2 == BIT_NOT_EXPR)
9034 {
9035 /* Instead of doing ~x ? y : z do x ? z : y. */
9036 vec_compare = new_temp;
9037 std::swap (vec_then_clause, vec_else_clause);
9038 }
9039 else
9040 {
9041 vec_compare = make_ssa_name (vec_cmp_type);
9042 new_stmt
9043 = gimple_build_assign (vec_compare, bitop2,
9044 vec_cond_lhs, new_temp);
9045 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9046 }
9047 }
a414c77f 9048 }
bb6c2b68
RS
9049 if (reduction_type == EXTRACT_LAST_REDUCTION)
9050 {
9051 if (!is_gimple_val (vec_compare))
9052 {
9053 tree vec_compare_name = make_ssa_name (vec_cmp_type);
e1bd7296
RS
9054 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9055 vec_compare);
bb6c2b68
RS
9056 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9057 vec_compare = vec_compare_name;
9058 }
9059 gcc_assert (reduc_index == 2);
e1bd7296 9060 gcall *new_stmt = gimple_build_call_internal
bb6c2b68
RS
9061 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9062 vec_then_clause);
9063 gimple_call_set_lhs (new_stmt, scalar_dest);
9064 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9065 if (stmt == gsi_stmt (*gsi))
e1bd7296 9066 new_stmt_info = vect_finish_replace_stmt (stmt, new_stmt);
bb6c2b68
RS
9067 else
9068 {
9069 /* In this case we're moving the definition to later in the
9070 block. That doesn't matter because the only uses of the
9071 lhs are in phi statements. */
9072 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9073 gsi_remove (&old_gsi, true);
e1bd7296
RS
9074 new_stmt_info
9075 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
bb6c2b68
RS
9076 }
9077 }
9078 else
9079 {
9080 new_temp = make_ssa_name (vec_dest);
e1bd7296
RS
9081 gassign *new_stmt
9082 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9083 vec_then_clause, vec_else_clause);
9084 new_stmt_info
9085 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
bb6c2b68 9086 }
f7e531cf 9087 if (slp_node)
e1bd7296 9088 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
f7e531cf
IR
9089 }
9090
9091 if (slp_node)
9092 continue;
9093
e1bd7296
RS
9094 if (j == 0)
9095 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9096 else
9097 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
f7e531cf 9098
e1bd7296 9099 prev_stmt_info = new_stmt_info;
a855b1b1 9100 }
b8698a0f 9101
9771b263
DN
9102 vec_oprnds0.release ();
9103 vec_oprnds1.release ();
9104 vec_oprnds2.release ();
9105 vec_oprnds3.release ();
f7e531cf 9106
ebfd146a
IR
9107 return true;
9108}
9109
42fd8198
IE
9110/* vectorizable_comparison.
9111
9112 Check if STMT is comparison expression that can be vectorized.
9113 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9114 comparison, put it in VEC_STMT, and insert it at GSI.
9115
9116 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9117
fce57248 9118static bool
42fd8198 9119vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 9120 stmt_vec_info *vec_stmt, tree reduc_def,
68435eb2 9121 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
42fd8198
IE
9122{
9123 tree lhs, rhs1, rhs2;
9124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9125 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9126 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9127 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9128 tree new_temp;
9129 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9130 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 9131 int ndts = 2;
928686b1 9132 poly_uint64 nunits;
42fd8198 9133 int ncopies;
49e76ff1 9134 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
9135 stmt_vec_info prev_stmt_info = NULL;
9136 int i, j;
9137 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9138 vec<tree> vec_oprnds0 = vNULL;
9139 vec<tree> vec_oprnds1 = vNULL;
42fd8198
IE
9140 tree mask_type;
9141 tree mask;
9142
c245362b
IE
9143 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9144 return false;
9145
30480bcd 9146 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
9147 return false;
9148
9149 mask_type = vectype;
9150 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9151
fce57248 9152 if (slp_node)
42fd8198
IE
9153 ncopies = 1;
9154 else
e8f142e2 9155 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
9156
9157 gcc_assert (ncopies >= 1);
42fd8198
IE
9158 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9159 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9160 && reduc_def))
9161 return false;
9162
9163 if (STMT_VINFO_LIVE_P (stmt_info))
9164 {
9165 if (dump_enabled_p ())
9166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9167 "value used after loop.\n");
9168 return false;
9169 }
9170
9171 if (!is_gimple_assign (stmt))
9172 return false;
9173
9174 code = gimple_assign_rhs_code (stmt);
9175
9176 if (TREE_CODE_CLASS (code) != tcc_comparison)
9177 return false;
9178
9179 rhs1 = gimple_assign_rhs1 (stmt);
9180 rhs2 = gimple_assign_rhs2 (stmt);
9181
894dd753 9182 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
42fd8198
IE
9183 return false;
9184
894dd753 9185 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
42fd8198
IE
9186 return false;
9187
9188 if (vectype1 && vectype2
928686b1
RS
9189 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9190 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
9191 return false;
9192
9193 vectype = vectype1 ? vectype1 : vectype2;
9194
9195 /* Invariant comparison. */
9196 if (!vectype)
9197 {
69a9a66f 9198 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9199 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9200 return false;
9201 }
928686b1 9202 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9203 return false;
9204
49e76ff1
IE
9205 /* Can't compare mask and non-mask types. */
9206 if (vectype1 && vectype2
9207 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9208 return false;
9209
9210 /* Boolean values may have another representation in vectors
9211 and therefore we prefer bit operations over comparison for
9212 them (which also works for scalar masks). We store opcodes
9213 to use in bitop1 and bitop2. Statement is vectorized as
9214 BITOP2 (rhs1 BITOP1 rhs2) or
9215 rhs1 BITOP2 (BITOP1 rhs2)
9216 depending on bitop1 and bitop2 arity. */
9217 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9218 {
9219 if (code == GT_EXPR)
9220 {
9221 bitop1 = BIT_NOT_EXPR;
9222 bitop2 = BIT_AND_EXPR;
9223 }
9224 else if (code == GE_EXPR)
9225 {
9226 bitop1 = BIT_NOT_EXPR;
9227 bitop2 = BIT_IOR_EXPR;
9228 }
9229 else if (code == LT_EXPR)
9230 {
9231 bitop1 = BIT_NOT_EXPR;
9232 bitop2 = BIT_AND_EXPR;
9233 std::swap (rhs1, rhs2);
264d951a 9234 std::swap (dts[0], dts[1]);
49e76ff1
IE
9235 }
9236 else if (code == LE_EXPR)
9237 {
9238 bitop1 = BIT_NOT_EXPR;
9239 bitop2 = BIT_IOR_EXPR;
9240 std::swap (rhs1, rhs2);
264d951a 9241 std::swap (dts[0], dts[1]);
49e76ff1
IE
9242 }
9243 else
9244 {
9245 bitop1 = BIT_XOR_EXPR;
9246 if (code == EQ_EXPR)
9247 bitop2 = BIT_NOT_EXPR;
9248 }
9249 }
9250
42fd8198
IE
9251 if (!vec_stmt)
9252 {
49e76ff1 9253 if (bitop1 == NOP_EXPR)
68435eb2
RB
9254 {
9255 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9256 return false;
9257 }
49e76ff1
IE
9258 else
9259 {
9260 machine_mode mode = TYPE_MODE (vectype);
9261 optab optab;
9262
9263 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9264 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9265 return false;
9266
9267 if (bitop2 != NOP_EXPR)
9268 {
9269 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9270 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9271 return false;
9272 }
49e76ff1 9273 }
68435eb2
RB
9274
9275 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9276 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9277 dts, ndts, slp_node, cost_vec);
9278 return true;
42fd8198
IE
9279 }
9280
9281 /* Transform. */
9282 if (!slp_node)
9283 {
9284 vec_oprnds0.create (1);
9285 vec_oprnds1.create (1);
9286 }
9287
9288 /* Handle def. */
9289 lhs = gimple_assign_lhs (stmt);
9290 mask = vect_create_destination_var (lhs, mask_type);
9291
9292 /* Handle cmp expr. */
9293 for (j = 0; j < ncopies; j++)
9294 {
e1bd7296 9295 stmt_vec_info new_stmt_info = NULL;
42fd8198
IE
9296 if (j == 0)
9297 {
9298 if (slp_node)
9299 {
9300 auto_vec<tree, 2> ops;
9301 auto_vec<vec<tree>, 2> vec_defs;
9302
9303 ops.safe_push (rhs1);
9304 ops.safe_push (rhs2);
306b0c92 9305 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9306 vec_oprnds1 = vec_defs.pop ();
9307 vec_oprnds0 = vec_defs.pop ();
9308 }
9309 else
9310 {
e4af0bc4
IE
9311 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9312 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9313 }
9314 }
9315 else
9316 {
9317 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9318 vec_oprnds0.pop ());
9319 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9320 vec_oprnds1.pop ());
9321 }
9322
9323 if (!slp_node)
9324 {
9325 vec_oprnds0.quick_push (vec_rhs1);
9326 vec_oprnds1.quick_push (vec_rhs2);
9327 }
9328
9329 /* Arguments are ready. Create the new vector stmt. */
9330 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9331 {
9332 vec_rhs2 = vec_oprnds1[i];
9333
9334 new_temp = make_ssa_name (mask);
49e76ff1
IE
9335 if (bitop1 == NOP_EXPR)
9336 {
e1bd7296
RS
9337 gassign *new_stmt = gimple_build_assign (new_temp, code,
9338 vec_rhs1, vec_rhs2);
9339 new_stmt_info
9340 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
49e76ff1
IE
9341 }
9342 else
9343 {
e1bd7296 9344 gassign *new_stmt;
49e76ff1
IE
9345 if (bitop1 == BIT_NOT_EXPR)
9346 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9347 else
9348 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9349 vec_rhs2);
e1bd7296
RS
9350 new_stmt_info
9351 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
49e76ff1
IE
9352 if (bitop2 != NOP_EXPR)
9353 {
9354 tree res = make_ssa_name (mask);
9355 if (bitop2 == BIT_NOT_EXPR)
9356 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9357 else
9358 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9359 new_temp);
e1bd7296
RS
9360 new_stmt_info
9361 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
49e76ff1
IE
9362 }
9363 }
42fd8198 9364 if (slp_node)
e1bd7296 9365 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
42fd8198
IE
9366 }
9367
9368 if (slp_node)
9369 continue;
9370
9371 if (j == 0)
e1bd7296 9372 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
42fd8198 9373 else
e1bd7296 9374 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
42fd8198 9375
e1bd7296 9376 prev_stmt_info = new_stmt_info;
42fd8198
IE
9377 }
9378
9379 vec_oprnds0.release ();
9380 vec_oprnds1.release ();
9381
9382 return true;
9383}
ebfd146a 9384
68a0f2ff
RS
9385/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9386 can handle all live statements in the node. Otherwise return true
9387 if STMT is not live or if vectorizable_live_operation can handle it.
9388 GSI and VEC_STMT are as for vectorizable_live_operation. */
9389
9390static bool
9391can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
1eede195 9392 slp_tree slp_node, stmt_vec_info *vec_stmt,
68435eb2 9393 stmt_vector_for_cost *cost_vec)
68a0f2ff
RS
9394{
9395 if (slp_node)
9396 {
b9787581 9397 stmt_vec_info slp_stmt_info;
68a0f2ff 9398 unsigned int i;
b9787581 9399 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
68a0f2ff 9400 {
68a0f2ff 9401 if (STMT_VINFO_LIVE_P (slp_stmt_info)
b9787581 9402 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
68435eb2 9403 vec_stmt, cost_vec))
68a0f2ff
RS
9404 return false;
9405 }
9406 }
9407 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
68435eb2
RB
9408 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9409 cost_vec))
68a0f2ff
RS
9410 return false;
9411
9412 return true;
9413}
9414
8644a673 9415/* Make sure the statement is vectorizable. */
ebfd146a
IR
9416
9417bool
891ad31c 9418vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
68435eb2 9419 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
ebfd146a 9420{
8644a673 9421 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6585ff8f 9422 vec_info *vinfo = stmt_info->vinfo;
a70d6342 9423 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9424 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9425 bool ok;
363477c0 9426 gimple_seq pattern_def_seq;
ebfd146a 9427
73fbfcad 9428 if (dump_enabled_p ())
ebfd146a 9429 {
78c60e3d
SS
9430 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9431 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9432 }
ebfd146a 9433
1825a1f3 9434 if (gimple_has_volatile_ops (stmt))
b8698a0f 9435 {
73fbfcad 9436 if (dump_enabled_p ())
78c60e3d 9437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9438 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9439
9440 return false;
9441 }
b8698a0f 9442
d54a098e
RS
9443 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9444 && node == NULL
9445 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9446 {
9447 gimple_stmt_iterator si;
9448
9449 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9450 {
9451 gimple *pattern_def_stmt = gsi_stmt (si);
6585ff8f
RS
9452 stmt_vec_info pattern_def_stmt_info
9453 = vinfo->lookup_stmt (gsi_stmt (si));
9454 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9455 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
d54a098e
RS
9456 {
9457 /* Analyze def stmt of STMT if it's a pattern stmt. */
9458 if (dump_enabled_p ())
9459 {
9460 dump_printf_loc (MSG_NOTE, vect_location,
9461 "==> examining pattern def statement: ");
9462 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9463 }
9464
9465 if (!vect_analyze_stmt (pattern_def_stmt,
9466 need_to_vectorize, node, node_instance,
9467 cost_vec))
9468 return false;
9469 }
9470 }
9471 }
9472
b8698a0f 9473 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9474 to include:
9475 - the COND_EXPR which is the loop exit condition
9476 - any LABEL_EXPRs in the loop
b8698a0f 9477 - computations that are used only for array indexing or loop control.
8644a673 9478 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9479 instance, therefore, all the statements are relevant.
ebfd146a 9480
d092494c 9481 Pattern statement needs to be analyzed instead of the original statement
83197f37 9482 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9483 statements. In basic blocks we are called from some SLP instance
9484 traversal, don't analyze pattern stmts instead, the pattern stmts
9485 already will be part of SLP instance. */
83197f37 9486
10681ce8 9487 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9488 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9489 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9490 {
9d5e7640 9491 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10681ce8
RS
9492 && pattern_stmt_info
9493 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9494 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9d5e7640 9495 {
83197f37 9496 /* Analyze PATTERN_STMT instead of the original stmt. */
10681ce8
RS
9497 stmt = pattern_stmt_info->stmt;
9498 stmt_info = pattern_stmt_info;
73fbfcad 9499 if (dump_enabled_p ())
9d5e7640 9500 {
78c60e3d
SS
9501 dump_printf_loc (MSG_NOTE, vect_location,
9502 "==> examining pattern statement: ");
9503 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9504 }
9505 }
9506 else
9507 {
73fbfcad 9508 if (dump_enabled_p ())
e645e942 9509 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9510
9d5e7640
IR
9511 return true;
9512 }
8644a673 9513 }
83197f37 9514 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9515 && node == NULL
10681ce8
RS
9516 && pattern_stmt_info
9517 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9518 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
83197f37
IR
9519 {
9520 /* Analyze PATTERN_STMT too. */
73fbfcad 9521 if (dump_enabled_p ())
83197f37 9522 {
78c60e3d
SS
9523 dump_printf_loc (MSG_NOTE, vect_location,
9524 "==> examining pattern statement: ");
9525 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9526 }
9527
10681ce8 9528 if (!vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
68435eb2 9529 node_instance, cost_vec))
83197f37
IR
9530 return false;
9531 }
ebfd146a 9532
8644a673
IR
9533 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9534 {
9535 case vect_internal_def:
9536 break;
ebfd146a 9537
8644a673 9538 case vect_reduction_def:
7c5222ff 9539 case vect_nested_cycle:
14a61437
RB
9540 gcc_assert (!bb_vinfo
9541 && (relevance == vect_used_in_outer
9542 || relevance == vect_used_in_outer_by_reduction
9543 || relevance == vect_used_by_reduction
b28ead45
AH
9544 || relevance == vect_unused_in_scope
9545 || relevance == vect_used_only_live));
8644a673
IR
9546 break;
9547
9548 case vect_induction_def:
e7baeb39
RB
9549 gcc_assert (!bb_vinfo);
9550 break;
9551
8644a673
IR
9552 case vect_constant_def:
9553 case vect_external_def:
9554 case vect_unknown_def_type:
9555 default:
9556 gcc_unreachable ();
9557 }
ebfd146a 9558
8644a673 9559 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9560 {
8644a673 9561 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9562 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9563 || (is_gimple_call (stmt)
9564 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9565 *need_to_vectorize = true;
ebfd146a
IR
9566 }
9567
b1af7da6
RB
9568 if (PURE_SLP_STMT (stmt_info) && !node)
9569 {
9570 dump_printf_loc (MSG_NOTE, vect_location,
9571 "handled only by SLP analysis\n");
9572 return true;
9573 }
9574
9575 ok = true;
9576 if (!bb_vinfo
9577 && (STMT_VINFO_RELEVANT_P (stmt_info)
9578 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
68435eb2
RB
9579 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9580 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9581 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9582 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9583 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9584 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9585 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9586 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9587 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9588 cost_vec)
9589 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9590 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9591 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
b1af7da6
RB
9592 else
9593 {
9594 if (bb_vinfo)
68435eb2
RB
9595 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9596 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9597 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9598 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9599 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9600 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9601 cost_vec)
9602 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9603 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9604 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9605 cost_vec)
9606 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9607 cost_vec));
b1af7da6 9608 }
8644a673
IR
9609
9610 if (!ok)
ebfd146a 9611 {
73fbfcad 9612 if (dump_enabled_p ())
8644a673 9613 {
78c60e3d
SS
9614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9615 "not vectorized: relevant stmt not ");
9616 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9617 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9618 }
b8698a0f 9619
ebfd146a
IR
9620 return false;
9621 }
9622
8644a673
IR
9623 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9624 need extra handling, except for vectorizable reductions. */
68435eb2
RB
9625 if (!bb_vinfo
9626 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9627 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
ebfd146a 9628 {
73fbfcad 9629 if (dump_enabled_p ())
8644a673 9630 {
78c60e3d 9631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9632 "not vectorized: live stmt not supported: ");
78c60e3d 9633 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9634 }
b8698a0f 9635
8644a673 9636 return false;
ebfd146a
IR
9637 }
9638
ebfd146a
IR
9639 return true;
9640}
9641
9642
9643/* Function vect_transform_stmt.
9644
9645 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9646
9647bool
355fe088 9648vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9649 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9650 slp_instance slp_node_instance)
9651{
6585ff8f
RS
9652 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9653 vec_info *vinfo = stmt_info->vinfo;
ebfd146a 9654 bool is_store = false;
1eede195 9655 stmt_vec_info vec_stmt = NULL;
ebfd146a 9656 bool done;
ebfd146a 9657
fce57248 9658 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
1eede195 9659 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9660
e57d9a82
RB
9661 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9662 && nested_in_vect_loop_p
9663 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9664 stmt));
9665
ebfd146a
IR
9666 switch (STMT_VINFO_TYPE (stmt_info))
9667 {
9668 case type_demotion_vec_info_type:
ebfd146a 9669 case type_promotion_vec_info_type:
ebfd146a 9670 case type_conversion_vec_info_type:
68435eb2 9671 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9672 gcc_assert (done);
9673 break;
9674
9675 case induc_vec_info_type:
68435eb2 9676 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9677 gcc_assert (done);
9678 break;
9679
9dc3f7de 9680 case shift_vec_info_type:
68435eb2 9681 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9dc3f7de
IR
9682 gcc_assert (done);
9683 break;
9684
ebfd146a 9685 case op_vec_info_type:
68435eb2 9686 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9687 gcc_assert (done);
9688 break;
9689
9690 case assignment_vec_info_type:
68435eb2 9691 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a
IR
9692 gcc_assert (done);
9693 break;
9694
9695 case load_vec_info_type:
b8698a0f 9696 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9697 slp_node_instance, NULL);
ebfd146a
IR
9698 gcc_assert (done);
9699 break;
9700
9701 case store_vec_info_type:
68435eb2 9702 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
ebfd146a 9703 gcc_assert (done);
0d0293ac 9704 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9705 {
9706 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9707 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9708 one are skipped, and there vec_stmt_info shouldn't be freed
9709 meanwhile. */
0d0293ac 9710 *grouped_store = true;
bffb8014 9711 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2c53b149 9712 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
ebfd146a 9713 is_store = true;
f307441a 9714 }
ebfd146a
IR
9715 else
9716 is_store = true;
9717 break;
9718
9719 case condition_vec_info_type:
68435eb2 9720 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
ebfd146a
IR
9721 gcc_assert (done);
9722 break;
9723
42fd8198 9724 case comparison_vec_info_type:
68435eb2 9725 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
42fd8198
IE
9726 gcc_assert (done);
9727 break;
9728
ebfd146a 9729 case call_vec_info_type:
68435eb2 9730 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
039d9ea1 9731 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9732 break;
9733
0136f8f0 9734 case call_simd_clone_vec_info_type:
68435eb2 9735 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
0136f8f0
AH
9736 stmt = gsi_stmt (*gsi);
9737 break;
9738
ebfd146a 9739 case reduc_vec_info_type:
891ad31c 9740 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
68435eb2 9741 slp_node_instance, NULL);
ebfd146a
IR
9742 gcc_assert (done);
9743 break;
9744
9745 default:
9746 if (!STMT_VINFO_LIVE_P (stmt_info))
9747 {
73fbfcad 9748 if (dump_enabled_p ())
78c60e3d 9749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9750 "stmt not supported.\n");
ebfd146a
IR
9751 gcc_unreachable ();
9752 }
9753 }
9754
225ce44b
RB
9755 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9756 This would break hybrid SLP vectorization. */
9757 if (slp_node)
d90f8440 9758 gcc_assert (!vec_stmt
1eede195 9759 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
225ce44b 9760
ebfd146a
IR
9761 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9762 is being vectorized, but outside the immediately enclosing loop. */
9763 if (vec_stmt
e57d9a82 9764 && nested_p
ebfd146a
IR
9765 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9766 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9767 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9768 vect_used_in_outer_by_reduction))
ebfd146a 9769 {
a70d6342
IR
9770 struct loop *innerloop = LOOP_VINFO_LOOP (
9771 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9772 imm_use_iterator imm_iter;
9773 use_operand_p use_p;
9774 tree scalar_dest;
ebfd146a 9775
73fbfcad 9776 if (dump_enabled_p ())
78c60e3d 9777 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9778 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9779
9780 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9781 (to be used when vectorizing outer-loop stmts that use the DEF of
9782 STMT). */
9783 if (gimple_code (stmt) == GIMPLE_PHI)
9784 scalar_dest = PHI_RESULT (stmt);
9785 else
9786 scalar_dest = gimple_assign_lhs (stmt);
9787
9788 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6585ff8f
RS
9789 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9790 {
9791 stmt_vec_info exit_phi_info
9792 = vinfo->lookup_stmt (USE_STMT (use_p));
9793 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9794 }
ebfd146a
IR
9795 }
9796
9797 /* Handle stmts whose DEF is used outside the loop-nest that is
9798 being vectorized. */
68a0f2ff 9799 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9800 {
68435eb2 9801 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
ebfd146a
IR
9802 gcc_assert (done);
9803 }
9804
9805 if (vec_stmt)
83197f37 9806 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9807
b8698a0f 9808 return is_store;
ebfd146a
IR
9809}
9810
9811
b8698a0f 9812/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9813 stmt_vec_info. */
9814
9815void
355fe088 9816vect_remove_stores (gimple *first_stmt)
ebfd146a 9817{
355fe088 9818 gimple *next = first_stmt;
ebfd146a
IR
9819 gimple_stmt_iterator next_si;
9820
9821 while (next)
9822 {
78048b1c
JJ
9823 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9824
bffb8014 9825 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
78048b1c
JJ
9826 if (is_pattern_stmt_p (stmt_info))
9827 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9828 /* Free the attached stmt_vec_info and remove the stmt. */
9829 next_si = gsi_for_stmt (next);
3d3f2249 9830 unlink_stmt_vdef (next);
ebfd146a 9831 gsi_remove (&next_si, true);
3d3f2249 9832 release_defs (next);
ebfd146a
IR
9833 free_stmt_vec_info (next);
9834 next = tmp;
9835 }
9836}
9837
9838
9839/* Function new_stmt_vec_info.
9840
9841 Create and initialize a new stmt_vec_info struct for STMT. */
9842
9843stmt_vec_info
310213d4 9844new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9845{
9846 stmt_vec_info res;
dbe1b846 9847 res = (_stmt_vec_info *) xcalloc (1, sizeof (struct _stmt_vec_info));
ebfd146a
IR
9848
9849 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9850 STMT_VINFO_STMT (res) = stmt;
310213d4 9851 res->vinfo = vinfo;
8644a673 9852 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9853 STMT_VINFO_LIVE_P (res) = false;
9854 STMT_VINFO_VECTYPE (res) = NULL;
9855 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9856 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a 9857 STMT_VINFO_IN_PATTERN_P (res) = false;
363477c0 9858 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9859 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9860 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9861 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9862
ebfd146a
IR
9863 if (gimple_code (stmt) == GIMPLE_PHI
9864 && is_loop_header_bb_p (gimple_bb (stmt)))
9865 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9866 else
8644a673
IR
9867 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9868
9771b263 9869 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9870 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9871 STMT_VINFO_NUM_SLP_USES (res) = 0;
9872
2c53b149
RB
9873 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9874 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9875 res->size = 0; /* GROUP_SIZE */
9876 res->store_count = 0; /* GROUP_STORE_COUNT */
9877 res->gap = 0; /* GROUP_GAP */
9878 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
ebfd146a 9879
ca823c85
RB
9880 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9881 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9882
ebfd146a
IR
9883 return res;
9884}
9885
9886
f8c0baaf 9887/* Set the current stmt_vec_info vector to V. */
ebfd146a
IR
9888
9889void
f8c0baaf 9890set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
ebfd146a 9891{
f8c0baaf 9892 stmt_vec_info_vec = v;
ebfd146a
IR
9893}
9894
f8c0baaf 9895/* Free the stmt_vec_info entries in V and release V. */
ebfd146a
IR
9896
9897void
f8c0baaf 9898free_stmt_vec_infos (vec<stmt_vec_info> *v)
ebfd146a 9899{
93675444 9900 unsigned int i;
3161455c 9901 stmt_vec_info info;
f8c0baaf 9902 FOR_EACH_VEC_ELT (*v, i, info)
dbe1b846 9903 if (info != NULL_STMT_VEC_INFO)
3161455c 9904 free_stmt_vec_info (STMT_VINFO_STMT (info));
f8c0baaf
RB
9905 if (v == stmt_vec_info_vec)
9906 stmt_vec_info_vec = NULL;
9907 v->release ();
ebfd146a
IR
9908}
9909
9910
9911/* Free stmt vectorization related info. */
9912
9913void
355fe088 9914free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9915{
9916 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9917
9918 if (!stmt_info)
9919 return;
9920
78048b1c
JJ
9921 /* Check if this statement has a related "pattern stmt"
9922 (introduced by the vectorizer during the pattern recognition
9923 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9924 too. */
9925 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9926 {
e3947d80
RS
9927 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9928 for (gimple_stmt_iterator si = gsi_start (seq);
9929 !gsi_end_p (si); gsi_next (&si))
9930 {
9931 gimple *seq_stmt = gsi_stmt (si);
9932 gimple_set_bb (seq_stmt, NULL);
9933 tree lhs = gimple_get_lhs (seq_stmt);
9934 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9935 release_ssa_name (lhs);
9936 free_stmt_vec_info (seq_stmt);
9937 }
10681ce8
RS
9938 stmt_vec_info patt_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9939 if (patt_stmt_info)
78048b1c 9940 {
10681ce8
RS
9941 gimple_set_bb (patt_stmt_info->stmt, NULL);
9942 tree lhs = gimple_get_lhs (patt_stmt_info->stmt);
e6f5c25d 9943 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9944 release_ssa_name (lhs);
10681ce8 9945 free_stmt_vec_info (patt_stmt_info);
78048b1c
JJ
9946 }
9947 }
9948
9771b263 9949 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9950 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9951 set_vinfo_for_stmt (stmt, NULL);
9952 free (stmt_info);
9953}
9954
9955
bb67d9c7 9956/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9957
bb67d9c7 9958 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9959 by the target. */
9960
c803b2a9 9961tree
86e36728 9962get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9963{
c7d97b28 9964 tree orig_scalar_type = scalar_type;
3bd8f481 9965 scalar_mode inner_mode;
ef4bddc2 9966 machine_mode simd_mode;
86e36728 9967 poly_uint64 nunits;
ebfd146a
IR
9968 tree vectype;
9969
3bd8f481
RS
9970 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9971 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9972 return NULL_TREE;
9973
3bd8f481 9974 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9975
7b7b1813
RG
9976 /* For vector types of elements whose mode precision doesn't
9977 match their types precision we use a element type of mode
9978 precision. The vectorization routines will have to make sure
48f2e373
RB
9979 they support the proper result truncation/extension.
9980 We also make sure to build vector types with INTEGER_TYPE
9981 component type only. */
6d7971b8 9982 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9983 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9984 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9985 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9986 TYPE_UNSIGNED (scalar_type));
6d7971b8 9987
ccbf5bb4
RG
9988 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9989 When the component mode passes the above test simply use a type
9990 corresponding to that mode. The theory is that any use that
9991 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9992 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9993 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9994 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9995
9996 /* We can't build a vector type of elements with alignment bigger than
9997 their size. */
dfc2e2ac 9998 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9999 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10000 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 10001
dfc2e2ac
RB
10002 /* If we felt back to using the mode fail if there was
10003 no scalar type for it. */
10004 if (scalar_type == NULL_TREE)
10005 return NULL_TREE;
10006
bb67d9c7
RG
10007 /* If no size was supplied use the mode the target prefers. Otherwise
10008 lookup a vector mode of the specified size. */
86e36728 10009 if (known_eq (size, 0U))
bb67d9c7 10010 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
10011 else if (!multiple_p (size, nbytes, &nunits)
10012 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 10013 return NULL_TREE;
4c8fd8ac 10014 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 10015 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 10016 return NULL_TREE;
ebfd146a
IR
10017
10018 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
10019
10020 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10021 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 10022 return NULL_TREE;
ebfd146a 10023
c7d97b28
RB
10024 /* Re-attach the address-space qualifier if we canonicalized the scalar
10025 type. */
10026 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10027 return build_qualified_type
10028 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10029
ebfd146a
IR
10030 return vectype;
10031}
10032
86e36728 10033poly_uint64 current_vector_size;
bb67d9c7
RG
10034
10035/* Function get_vectype_for_scalar_type.
10036
10037 Returns the vector type corresponding to SCALAR_TYPE as supported
10038 by the target. */
10039
10040tree
10041get_vectype_for_scalar_type (tree scalar_type)
10042{
10043 tree vectype;
10044 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10045 current_vector_size);
10046 if (vectype
86e36728 10047 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
10048 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10049 return vectype;
10050}
10051
42fd8198
IE
10052/* Function get_mask_type_for_scalar_type.
10053
10054 Returns the mask type corresponding to a result of comparison
10055 of vectors of specified SCALAR_TYPE as supported by target. */
10056
10057tree
10058get_mask_type_for_scalar_type (tree scalar_type)
10059{
10060 tree vectype = get_vectype_for_scalar_type (scalar_type);
10061
10062 if (!vectype)
10063 return NULL;
10064
10065 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10066 current_vector_size);
10067}
10068
b690cc0f
RG
10069/* Function get_same_sized_vectype
10070
10071 Returns a vector type corresponding to SCALAR_TYPE of size
10072 VECTOR_TYPE if supported by the target. */
10073
10074tree
bb67d9c7 10075get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 10076{
2568d8a1 10077 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
10078 return build_same_sized_truth_vector_type (vector_type);
10079
bb67d9c7
RG
10080 return get_vectype_for_scalar_type_and_size
10081 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
10082}
10083
ebfd146a
IR
10084/* Function vect_is_simple_use.
10085
10086 Input:
81c40241
RB
10087 VINFO - the vect info of the loop or basic block that is being vectorized.
10088 OPERAND - operand in the loop or bb.
10089 Output:
fef96d8e
RS
10090 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10091 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10092 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10093 the definition could be anywhere in the function
81c40241 10094 DT - the type of definition
ebfd146a
IR
10095
10096 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 10097 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 10098 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 10099 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
10100 is the case in reduction/induction computations).
10101 For basic blocks, supportable operands are constants and bb invariants.
10102 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
10103
10104bool
894dd753 10105vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
fef96d8e 10106 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
b8698a0f 10107{
fef96d8e
RS
10108 if (def_stmt_info_out)
10109 *def_stmt_info_out = NULL;
894dd753
RS
10110 if (def_stmt_out)
10111 *def_stmt_out = NULL;
3fc356dc 10112 *dt = vect_unknown_def_type;
b8698a0f 10113
73fbfcad 10114 if (dump_enabled_p ())
ebfd146a 10115 {
78c60e3d
SS
10116 dump_printf_loc (MSG_NOTE, vect_location,
10117 "vect_is_simple_use: operand ");
30f502ed
RB
10118 if (TREE_CODE (operand) == SSA_NAME
10119 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10120 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10121 else
10122 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
ebfd146a 10123 }
b8698a0f 10124
b758f602 10125 if (CONSTANT_CLASS_P (operand))
30f502ed
RB
10126 *dt = vect_constant_def;
10127 else if (is_gimple_min_invariant (operand))
10128 *dt = vect_external_def;
10129 else if (TREE_CODE (operand) != SSA_NAME)
10130 *dt = vect_unknown_def_type;
10131 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
8644a673 10132 *dt = vect_external_def;
ebfd146a
IR
10133 else
10134 {
30f502ed 10135 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
c98d0595
RS
10136 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10137 if (!stmt_vinfo)
30f502ed
RB
10138 *dt = vect_external_def;
10139 else
0f8c840c 10140 {
30f502ed
RB
10141 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
10142 {
10681ce8
RS
10143 stmt_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
10144 def_stmt = stmt_vinfo->stmt;
30f502ed
RB
10145 }
10146 switch (gimple_code (def_stmt))
10147 {
10148 case GIMPLE_PHI:
10149 case GIMPLE_ASSIGN:
10150 case GIMPLE_CALL:
10151 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10152 break;
10153 default:
10154 *dt = vect_unknown_def_type;
10155 break;
10156 }
fef96d8e
RS
10157 if (def_stmt_info_out)
10158 *def_stmt_info_out = stmt_vinfo;
0f8c840c 10159 }
30f502ed
RB
10160 if (def_stmt_out)
10161 *def_stmt_out = def_stmt;
ebfd146a
IR
10162 }
10163
2e8ab70c
RB
10164 if (dump_enabled_p ())
10165 {
30f502ed 10166 dump_printf (MSG_NOTE, ", type of def: ");
2e8ab70c
RB
10167 switch (*dt)
10168 {
10169 case vect_uninitialized_def:
10170 dump_printf (MSG_NOTE, "uninitialized\n");
10171 break;
10172 case vect_constant_def:
10173 dump_printf (MSG_NOTE, "constant\n");
10174 break;
10175 case vect_external_def:
10176 dump_printf (MSG_NOTE, "external\n");
10177 break;
10178 case vect_internal_def:
10179 dump_printf (MSG_NOTE, "internal\n");
10180 break;
10181 case vect_induction_def:
10182 dump_printf (MSG_NOTE, "induction\n");
10183 break;
10184 case vect_reduction_def:
10185 dump_printf (MSG_NOTE, "reduction\n");
10186 break;
10187 case vect_double_reduction_def:
10188 dump_printf (MSG_NOTE, "double reduction\n");
10189 break;
10190 case vect_nested_cycle:
10191 dump_printf (MSG_NOTE, "nested cycle\n");
10192 break;
10193 case vect_unknown_def_type:
10194 dump_printf (MSG_NOTE, "unknown\n");
10195 break;
10196 }
10197 }
10198
81c40241 10199 if (*dt == vect_unknown_def_type)
ebfd146a 10200 {
73fbfcad 10201 if (dump_enabled_p ())
78c60e3d 10202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10203 "Unsupported pattern.\n");
ebfd146a
IR
10204 return false;
10205 }
10206
ebfd146a
IR
10207 return true;
10208}
10209
81c40241 10210/* Function vect_is_simple_use.
b690cc0f 10211
81c40241 10212 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10213 type of OPERAND and stores it to *VECTYPE. If the definition of
10214 OPERAND is vect_uninitialized_def, vect_constant_def or
10215 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10216 is responsible to compute the best suited vector type for the
10217 scalar operand. */
10218
10219bool
894dd753 10220vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
fef96d8e
RS
10221 tree *vectype, stmt_vec_info *def_stmt_info_out,
10222 gimple **def_stmt_out)
b690cc0f 10223{
fef96d8e 10224 stmt_vec_info def_stmt_info;
894dd753 10225 gimple *def_stmt;
fef96d8e 10226 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
b690cc0f
RG
10227 return false;
10228
894dd753
RS
10229 if (def_stmt_out)
10230 *def_stmt_out = def_stmt;
fef96d8e
RS
10231 if (def_stmt_info_out)
10232 *def_stmt_info_out = def_stmt_info;
894dd753 10233
b690cc0f
RG
10234 /* Now get a vector type if the def is internal, otherwise supply
10235 NULL_TREE and leave it up to the caller to figure out a proper
10236 type for the use stmt. */
10237 if (*dt == vect_internal_def
10238 || *dt == vect_induction_def
10239 || *dt == vect_reduction_def
10240 || *dt == vect_double_reduction_def
10241 || *dt == vect_nested_cycle)
10242 {
fef96d8e 10243 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
b690cc0f 10244 gcc_assert (*vectype != NULL_TREE);
30f502ed
RB
10245 if (dump_enabled_p ())
10246 {
10247 dump_printf_loc (MSG_NOTE, vect_location,
10248 "vect_is_simple_use: vectype ");
10249 dump_generic_expr (MSG_NOTE, TDF_SLIM, *vectype);
10250 dump_printf (MSG_NOTE, "\n");
10251 }
b690cc0f
RG
10252 }
10253 else if (*dt == vect_uninitialized_def
10254 || *dt == vect_constant_def
10255 || *dt == vect_external_def)
10256 *vectype = NULL_TREE;
10257 else
10258 gcc_unreachable ();
10259
10260 return true;
10261}
10262
ebfd146a
IR
10263
10264/* Function supportable_widening_operation
10265
b8698a0f
L
10266 Check whether an operation represented by the code CODE is a
10267 widening operation that is supported by the target platform in
b690cc0f
RG
10268 vector form (i.e., when operating on arguments of type VECTYPE_IN
10269 producing a result of type VECTYPE_OUT).
b8698a0f 10270
1bda738b
JJ
10271 Widening operations we currently support are NOP (CONVERT), FLOAT,
10272 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10273 are supported by the target platform either directly (via vector
10274 tree-codes), or via target builtins.
ebfd146a
IR
10275
10276 Output:
b8698a0f
L
10277 - CODE1 and CODE2 are codes of vector operations to be used when
10278 vectorizing the operation, if available.
ebfd146a
IR
10279 - MULTI_STEP_CVT determines the number of required intermediate steps in
10280 case of multi-step conversion (like char->short->int - in that case
10281 MULTI_STEP_CVT will be 1).
b8698a0f
L
10282 - INTERM_TYPES contains the intermediate type required to perform the
10283 widening operation (short in the above example). */
ebfd146a
IR
10284
10285bool
355fe088 10286supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10287 tree vectype_out, tree vectype_in,
ebfd146a
IR
10288 enum tree_code *code1, enum tree_code *code2,
10289 int *multi_step_cvt,
9771b263 10290 vec<tree> *interm_types)
ebfd146a
IR
10291{
10292 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10293 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10294 struct loop *vect_loop = NULL;
ef4bddc2 10295 machine_mode vec_mode;
81f40b79 10296 enum insn_code icode1, icode2;
ebfd146a 10297 optab optab1, optab2;
b690cc0f
RG
10298 tree vectype = vectype_in;
10299 tree wide_vectype = vectype_out;
ebfd146a 10300 enum tree_code c1, c2;
4a00c761
JJ
10301 int i;
10302 tree prev_type, intermediate_type;
ef4bddc2 10303 machine_mode intermediate_mode, prev_mode;
4a00c761 10304 optab optab3, optab4;
ebfd146a 10305
4a00c761 10306 *multi_step_cvt = 0;
4ef69dfc
IR
10307 if (loop_info)
10308 vect_loop = LOOP_VINFO_LOOP (loop_info);
10309
ebfd146a
IR
10310 switch (code)
10311 {
10312 case WIDEN_MULT_EXPR:
6ae6116f
RH
10313 /* The result of a vectorized widening operation usually requires
10314 two vectors (because the widened results do not fit into one vector).
10315 The generated vector results would normally be expected to be
10316 generated in the same order as in the original scalar computation,
10317 i.e. if 8 results are generated in each vector iteration, they are
10318 to be organized as follows:
10319 vect1: [res1,res2,res3,res4],
10320 vect2: [res5,res6,res7,res8].
10321
10322 However, in the special case that the result of the widening
10323 operation is used in a reduction computation only, the order doesn't
10324 matter (because when vectorizing a reduction we change the order of
10325 the computation). Some targets can take advantage of this and
10326 generate more efficient code. For example, targets like Altivec,
10327 that support widen_mult using a sequence of {mult_even,mult_odd}
10328 generate the following vectors:
10329 vect1: [res1,res3,res5,res7],
10330 vect2: [res2,res4,res6,res8].
10331
10332 When vectorizing outer-loops, we execute the inner-loop sequentially
10333 (each vectorized inner-loop iteration contributes to VF outer-loop
10334 iterations in parallel). We therefore don't allow to change the
10335 order of the computation in the inner-loop during outer-loop
10336 vectorization. */
10337 /* TODO: Another case in which order doesn't *really* matter is when we
10338 widen and then contract again, e.g. (short)((int)x * y >> 8).
10339 Normally, pack_trunc performs an even/odd permute, whereas the
10340 repack from an even/odd expansion would be an interleave, which
10341 would be significantly simpler for e.g. AVX2. */
10342 /* In any case, in order to avoid duplicating the code below, recurse
10343 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10344 are properly set up for the caller. If we fail, we'll continue with
10345 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10346 if (vect_loop
10347 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10348 && !nested_in_vect_loop_p (vect_loop, stmt)
10349 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10350 stmt, vectype_out, vectype_in,
a86ec597
RH
10351 code1, code2, multi_step_cvt,
10352 interm_types))
ebc047a2
CH
10353 {
10354 /* Elements in a vector with vect_used_by_reduction property cannot
10355 be reordered if the use chain with this property does not have the
10356 same operation. One such an example is s += a * b, where elements
10357 in a and b cannot be reordered. Here we check if the vector defined
10358 by STMT is only directly used in the reduction statement. */
0d0a4e20
RS
10359 tree lhs = gimple_assign_lhs (stmt);
10360 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10361 if (use_stmt_info
10362 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10363 return true;
ebc047a2 10364 }
4a00c761
JJ
10365 c1 = VEC_WIDEN_MULT_LO_EXPR;
10366 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10367 break;
10368
81c40241
RB
10369 case DOT_PROD_EXPR:
10370 c1 = DOT_PROD_EXPR;
10371 c2 = DOT_PROD_EXPR;
10372 break;
10373
10374 case SAD_EXPR:
10375 c1 = SAD_EXPR;
10376 c2 = SAD_EXPR;
10377 break;
10378
6ae6116f
RH
10379 case VEC_WIDEN_MULT_EVEN_EXPR:
10380 /* Support the recursion induced just above. */
10381 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10382 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10383 break;
10384
36ba4aae 10385 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10386 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10387 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10388 break;
10389
ebfd146a 10390 CASE_CONVERT:
4a00c761
JJ
10391 c1 = VEC_UNPACK_LO_EXPR;
10392 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10393 break;
10394
10395 case FLOAT_EXPR:
4a00c761
JJ
10396 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10397 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10398 break;
10399
10400 case FIX_TRUNC_EXPR:
1bda738b
JJ
10401 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10402 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10403 break;
ebfd146a
IR
10404
10405 default:
10406 gcc_unreachable ();
10407 }
10408
6ae6116f 10409 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10410 std::swap (c1, c2);
4a00c761 10411
ebfd146a
IR
10412 if (code == FIX_TRUNC_EXPR)
10413 {
10414 /* The signedness is determined from output operand. */
b690cc0f
RG
10415 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10416 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10417 }
10418 else
10419 {
10420 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10421 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10422 }
10423
10424 if (!optab1 || !optab2)
10425 return false;
10426
10427 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10428 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10429 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10430 return false;
10431
4a00c761
JJ
10432 *code1 = c1;
10433 *code2 = c2;
10434
10435 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10436 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10437 /* For scalar masks we may have different boolean
10438 vector types having the same QImode. Thus we
10439 add additional check for elements number. */
10440 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10441 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10442 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10443
b8698a0f 10444 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10445 types. */
ebfd146a 10446
4a00c761
JJ
10447 prev_type = vectype;
10448 prev_mode = vec_mode;
b8698a0f 10449
4a00c761
JJ
10450 if (!CONVERT_EXPR_CODE_P (code))
10451 return false;
b8698a0f 10452
4a00c761
JJ
10453 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10454 intermediate steps in promotion sequence. We try
10455 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10456 not. */
9771b263 10457 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10458 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10459 {
10460 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10461 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10462 {
7cfb4d93 10463 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10464 if (intermediate_mode != TYPE_MODE (intermediate_type))
10465 return false;
10466 }
10467 else
10468 intermediate_type
10469 = lang_hooks.types.type_for_mode (intermediate_mode,
10470 TYPE_UNSIGNED (prev_type));
10471
4a00c761
JJ
10472 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10473 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10474
10475 if (!optab3 || !optab4
10476 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10477 || insn_data[icode1].operand[0].mode != intermediate_mode
10478 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10479 || insn_data[icode2].operand[0].mode != intermediate_mode
10480 || ((icode1 = optab_handler (optab3, intermediate_mode))
10481 == CODE_FOR_nothing)
10482 || ((icode2 = optab_handler (optab4, intermediate_mode))
10483 == CODE_FOR_nothing))
10484 break;
ebfd146a 10485
9771b263 10486 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10487 (*multi_step_cvt)++;
10488
10489 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10490 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10491 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10492 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10493 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10494
10495 prev_type = intermediate_type;
10496 prev_mode = intermediate_mode;
ebfd146a
IR
10497 }
10498
9771b263 10499 interm_types->release ();
4a00c761 10500 return false;
ebfd146a
IR
10501}
10502
10503
10504/* Function supportable_narrowing_operation
10505
b8698a0f
L
10506 Check whether an operation represented by the code CODE is a
10507 narrowing operation that is supported by the target platform in
b690cc0f
RG
10508 vector form (i.e., when operating on arguments of type VECTYPE_IN
10509 and producing a result of type VECTYPE_OUT).
b8698a0f 10510
1bda738b
JJ
10511 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10512 and FLOAT. This function checks if these operations are supported by
ebfd146a
IR
10513 the target platform directly via vector tree-codes.
10514
10515 Output:
b8698a0f
L
10516 - CODE1 is the code of a vector operation to be used when
10517 vectorizing the operation, if available.
ebfd146a
IR
10518 - MULTI_STEP_CVT determines the number of required intermediate steps in
10519 case of multi-step conversion (like int->short->char - in that case
10520 MULTI_STEP_CVT will be 1).
10521 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10522 narrowing operation (short in the above example). */
ebfd146a
IR
10523
10524bool
10525supportable_narrowing_operation (enum tree_code code,
b690cc0f 10526 tree vectype_out, tree vectype_in,
ebfd146a 10527 enum tree_code *code1, int *multi_step_cvt,
9771b263 10528 vec<tree> *interm_types)
ebfd146a 10529{
ef4bddc2 10530 machine_mode vec_mode;
ebfd146a
IR
10531 enum insn_code icode1;
10532 optab optab1, interm_optab;
b690cc0f
RG
10533 tree vectype = vectype_in;
10534 tree narrow_vectype = vectype_out;
ebfd146a 10535 enum tree_code c1;
3ae0661a 10536 tree intermediate_type, prev_type;
ef4bddc2 10537 machine_mode intermediate_mode, prev_mode;
ebfd146a 10538 int i;
4a00c761 10539 bool uns;
ebfd146a 10540
4a00c761 10541 *multi_step_cvt = 0;
ebfd146a
IR
10542 switch (code)
10543 {
10544 CASE_CONVERT:
10545 c1 = VEC_PACK_TRUNC_EXPR;
10546 break;
10547
10548 case FIX_TRUNC_EXPR:
10549 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10550 break;
10551
10552 case FLOAT_EXPR:
1bda738b
JJ
10553 c1 = VEC_PACK_FLOAT_EXPR;
10554 break;
ebfd146a
IR
10555
10556 default:
10557 gcc_unreachable ();
10558 }
10559
10560 if (code == FIX_TRUNC_EXPR)
10561 /* The signedness is determined from output operand. */
b690cc0f 10562 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10563 else
10564 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10565
10566 if (!optab1)
10567 return false;
10568
10569 vec_mode = TYPE_MODE (vectype);
947131ba 10570 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10571 return false;
10572
4a00c761
JJ
10573 *code1 = c1;
10574
10575 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10576 /* For scalar masks we may have different boolean
10577 vector types having the same QImode. Thus we
10578 add additional check for elements number. */
10579 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10580 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10581 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10582
1bda738b
JJ
10583 if (code == FLOAT_EXPR)
10584 return false;
10585
ebfd146a
IR
10586 /* Check if it's a multi-step conversion that can be done using intermediate
10587 types. */
4a00c761 10588 prev_mode = vec_mode;
3ae0661a 10589 prev_type = vectype;
4a00c761
JJ
10590 if (code == FIX_TRUNC_EXPR)
10591 uns = TYPE_UNSIGNED (vectype_out);
10592 else
10593 uns = TYPE_UNSIGNED (vectype);
10594
10595 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10596 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10597 costly than signed. */
10598 if (code == FIX_TRUNC_EXPR && uns)
10599 {
10600 enum insn_code icode2;
10601
10602 intermediate_type
10603 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10604 interm_optab
10605 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10606 if (interm_optab != unknown_optab
4a00c761
JJ
10607 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10608 && insn_data[icode1].operand[0].mode
10609 == insn_data[icode2].operand[0].mode)
10610 {
10611 uns = false;
10612 optab1 = interm_optab;
10613 icode1 = icode2;
10614 }
10615 }
ebfd146a 10616
4a00c761
JJ
10617 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10618 intermediate steps in promotion sequence. We try
10619 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10620 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10621 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10622 {
10623 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10624 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10625 {
7cfb4d93 10626 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10627 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10628 return false;
3ae0661a
IE
10629 }
10630 else
10631 intermediate_type
10632 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10633 interm_optab
10634 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10635 optab_default);
10636 if (!interm_optab
10637 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10638 || insn_data[icode1].operand[0].mode != intermediate_mode
10639 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10640 == CODE_FOR_nothing))
10641 break;
10642
9771b263 10643 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10644 (*multi_step_cvt)++;
10645
10646 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10647 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10648 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10649 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10650
10651 prev_mode = intermediate_mode;
3ae0661a 10652 prev_type = intermediate_type;
4a00c761 10653 optab1 = interm_optab;
ebfd146a
IR
10654 }
10655
9771b263 10656 interm_types->release ();
4a00c761 10657 return false;
ebfd146a 10658}
7cfb4d93
RS
10659
10660/* Generate and return a statement that sets vector mask MASK such that
10661 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10662
10663gcall *
10664vect_gen_while (tree mask, tree start_index, tree end_index)
10665{
10666 tree cmp_type = TREE_TYPE (start_index);
10667 tree mask_type = TREE_TYPE (mask);
10668 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10669 cmp_type, mask_type,
10670 OPTIMIZE_FOR_SPEED));
10671 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10672 start_index, end_index,
10673 build_zero_cst (mask_type));
10674 gimple_call_set_lhs (call, mask);
10675 return call;
10676}
535e7c11
RS
10677
10678/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10679 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10680
10681tree
10682vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10683 tree end_index)
10684{
10685 tree tmp = make_ssa_name (mask_type);
10686 gcall *call = vect_gen_while (tmp, start_index, end_index);
10687 gimple_seq_add_stmt (seq, call);
10688 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10689}
1f3cb663
RS
10690
10691/* Try to compute the vector types required to vectorize STMT_INFO,
10692 returning true on success and false if vectorization isn't possible.
10693
10694 On success:
10695
10696 - Set *STMT_VECTYPE_OUT to:
10697 - NULL_TREE if the statement doesn't need to be vectorized;
10698 - boolean_type_node if the statement is a boolean operation whose
10699 vector type can only be determined once all the other vector types
10700 are known; and
10701 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10702
10703 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10704 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10705 statement does not help to determine the overall number of units. */
10706
10707bool
10708vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10709 tree *stmt_vectype_out,
10710 tree *nunits_vectype_out)
10711{
10712 gimple *stmt = stmt_info->stmt;
10713
10714 *stmt_vectype_out = NULL_TREE;
10715 *nunits_vectype_out = NULL_TREE;
10716
10717 if (gimple_get_lhs (stmt) == NULL_TREE
10718 /* MASK_STORE has no lhs, but is ok. */
10719 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10720 {
10721 if (is_a <gcall *> (stmt))
10722 {
10723 /* Ignore calls with no lhs. These must be calls to
10724 #pragma omp simd functions, and what vectorization factor
10725 it really needs can't be determined until
10726 vectorizable_simd_clone_call. */
10727 if (dump_enabled_p ())
10728 dump_printf_loc (MSG_NOTE, vect_location,
10729 "defer to SIMD clone analysis.\n");
10730 return true;
10731 }
10732
10733 if (dump_enabled_p ())
10734 {
10735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10736 "not vectorized: irregular stmt.");
10737 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10738 }
10739 return false;
10740 }
10741
10742 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10743 {
10744 if (dump_enabled_p ())
10745 {
10746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10747 "not vectorized: vector stmt in loop:");
10748 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10749 }
10750 return false;
10751 }
10752
10753 tree vectype;
10754 tree scalar_type = NULL_TREE;
10755 if (STMT_VINFO_VECTYPE (stmt_info))
10756 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10757 else
10758 {
10759 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10760 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10761 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10762 else
10763 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10764
10765 /* Pure bool ops don't participate in number-of-units computation.
10766 For comparisons use the types being compared. */
10767 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10768 && is_gimple_assign (stmt)
10769 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10770 {
10771 *stmt_vectype_out = boolean_type_node;
10772
10773 tree rhs1 = gimple_assign_rhs1 (stmt);
10774 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10775 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10776 scalar_type = TREE_TYPE (rhs1);
10777 else
10778 {
10779 if (dump_enabled_p ())
10780 dump_printf_loc (MSG_NOTE, vect_location,
10781 "pure bool operation.\n");
10782 return true;
10783 }
10784 }
10785
10786 if (dump_enabled_p ())
10787 {
10788 dump_printf_loc (MSG_NOTE, vect_location,
10789 "get vectype for scalar type: ");
10790 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10791 dump_printf (MSG_NOTE, "\n");
10792 }
10793 vectype = get_vectype_for_scalar_type (scalar_type);
10794 if (!vectype)
10795 {
10796 if (dump_enabled_p ())
10797 {
10798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10799 "not vectorized: unsupported data-type ");
10800 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10801 scalar_type);
10802 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10803 }
10804 return false;
10805 }
10806
10807 if (!*stmt_vectype_out)
10808 *stmt_vectype_out = vectype;
10809
10810 if (dump_enabled_p ())
10811 {
10812 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10813 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10814 dump_printf (MSG_NOTE, "\n");
10815 }
10816 }
10817
10818 /* Don't try to compute scalar types if the stmt produces a boolean
10819 vector; use the existing vector type instead. */
10820 tree nunits_vectype;
10821 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10822 nunits_vectype = vectype;
10823 else
10824 {
10825 /* The number of units is set according to the smallest scalar
10826 type (or the largest vector size, but we only support one
10827 vector size per vectorization). */
10828 if (*stmt_vectype_out != boolean_type_node)
10829 {
10830 HOST_WIDE_INT dummy;
10831 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10832 }
10833 if (dump_enabled_p ())
10834 {
10835 dump_printf_loc (MSG_NOTE, vect_location,
10836 "get vectype for scalar type: ");
10837 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10838 dump_printf (MSG_NOTE, "\n");
10839 }
10840 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10841 }
10842 if (!nunits_vectype)
10843 {
10844 if (dump_enabled_p ())
10845 {
10846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10847 "not vectorized: unsupported data-type ");
10848 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10849 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10850 }
10851 return false;
10852 }
10853
10854 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10855 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10856 {
10857 if (dump_enabled_p ())
10858 {
10859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10860 "not vectorized: different sized vector "
10861 "types in statement, ");
10862 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10863 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10864 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10865 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10866 }
10867 return false;
10868 }
10869
10870 if (dump_enabled_p ())
10871 {
10872 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10873 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10874 dump_printf (MSG_NOTE, "\n");
10875
10876 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10877 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10878 dump_printf (MSG_NOTE, "\n");
10879 }
10880
10881 *nunits_vectype_out = nunits_vectype;
10882 return true;
10883}
10884
10885/* Try to determine the correct vector type for STMT_INFO, which is a
10886 statement that produces a scalar boolean result. Return the vector
10887 type on success, otherwise return NULL_TREE. */
10888
10889tree
10890vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10891{
10892 gimple *stmt = stmt_info->stmt;
10893 tree mask_type = NULL;
10894 tree vectype, scalar_type;
10895
10896 if (is_gimple_assign (stmt)
10897 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10898 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10899 {
10900 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10901 mask_type = get_mask_type_for_scalar_type (scalar_type);
10902
10903 if (!mask_type)
10904 {
10905 if (dump_enabled_p ())
10906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10907 "not vectorized: unsupported mask\n");
10908 return NULL_TREE;
10909 }
10910 }
10911 else
10912 {
10913 tree rhs;
10914 ssa_op_iter iter;
1f3cb663
RS
10915 enum vect_def_type dt;
10916
10917 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10918 {
894dd753 10919 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
1f3cb663
RS
10920 {
10921 if (dump_enabled_p ())
10922 {
10923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10924 "not vectorized: can't compute mask type "
10925 "for statement, ");
10926 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10927 0);
10928 }
10929 return NULL_TREE;
10930 }
10931
10932 /* No vectype probably means external definition.
10933 Allow it in case there is another operand which
10934 allows to determine mask type. */
10935 if (!vectype)
10936 continue;
10937
10938 if (!mask_type)
10939 mask_type = vectype;
10940 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10941 TYPE_VECTOR_SUBPARTS (vectype)))
10942 {
10943 if (dump_enabled_p ())
10944 {
10945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10946 "not vectorized: different sized masks "
10947 "types in statement, ");
10948 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10949 mask_type);
10950 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10951 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10952 vectype);
10953 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10954 }
10955 return NULL_TREE;
10956 }
10957 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10958 != VECTOR_BOOLEAN_TYPE_P (vectype))
10959 {
10960 if (dump_enabled_p ())
10961 {
10962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10963 "not vectorized: mixed mask and "
10964 "nonmask vector types in statement, ");
10965 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10966 mask_type);
10967 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10968 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10969 vectype);
10970 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10971 }
10972 return NULL_TREE;
10973 }
10974 }
10975
10976 /* We may compare boolean value loaded as vector of integers.
10977 Fix mask_type in such case. */
10978 if (mask_type
10979 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10980 && gimple_code (stmt) == GIMPLE_ASSIGN
10981 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10982 mask_type = build_same_sized_truth_vector_type (mask_type);
10983 }
10984
10985 /* No mask_type should mean loop invariant predicate.
10986 This is probably a subject for optimization in if-conversion. */
10987 if (!mask_type && dump_enabled_p ())
10988 {
10989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10990 "not vectorized: can't compute mask type "
10991 "for statement, ");
10992 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10993 }
10994 return mask_type;
10995}