]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
[testsuite] Fix tests for pr84805 and pr85405.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
5ebaa477 51#include "tree-vector-builder.h"
f151c9e1 52#include "vec-perm-indices.h"
7cfb4d93
RS
53#include "tree-ssa-loop-niter.h"
54#include "gimple-fold.h"
ebfd146a 55
7ee2468b
SB
56/* For lang_hooks.types.type_for_mode. */
57#include "langhooks.h"
ebfd146a 58
c3e7ee41
BS
59/* Return the vectorized type for the given statement. */
60
61tree
62stmt_vectype (struct _stmt_vec_info *stmt_info)
63{
64 return STMT_VINFO_VECTYPE (stmt_info);
65}
66
67/* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69bool
70stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71{
355fe088 72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83}
84
85/* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89unsigned
92345349 90record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 92 int misalign, enum vect_cost_model_location where)
c3e7ee41 93{
cc9fe6bb
JH
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
92345349 100 if (body_cost_vec)
c3e7ee41 101 {
92345349 102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
103 stmt_info_for_cost si = { count, kind,
104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 misalign };
106 body_cost_vec->safe_push (si);
c3e7ee41 107 return (unsigned)
92345349 108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
109 }
110 else
310213d4
RB
111 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
113}
114
272c6793
RS
115/* Return a variable of type ELEM_TYPE[NELEMS]. */
116
117static tree
118create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119{
120 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 "vect_array");
122}
123
124/* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
128
129static tree
355fe088 130read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
131 tree array, unsigned HOST_WIDE_INT n)
132{
133 tree vect_type, vect, vect_name, array_ref;
355fe088 134 gimple *new_stmt;
272c6793
RS
135
136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137 vect_type = TREE_TYPE (TREE_TYPE (array));
138 vect = vect_create_destination_var (scalar_dest, vect_type);
139 array_ref = build4 (ARRAY_REF, vect_type, array,
140 build_int_cst (size_type_node, n),
141 NULL_TREE, NULL_TREE);
142
143 new_stmt = gimple_build_assign (vect, array_ref);
144 vect_name = make_ssa_name (vect, new_stmt);
145 gimple_assign_set_lhs (new_stmt, vect_name);
146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
147
148 return vect_name;
149}
150
151/* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
154
155static void
355fe088 156write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
157 tree array, unsigned HOST_WIDE_INT n)
158{
159 tree array_ref;
355fe088 160 gimple *new_stmt;
272c6793
RS
161
162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
165
166 new_stmt = gimple_build_assign (array_ref, vect);
167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
168}
169
170/* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
172 (and its group). */
173
174static tree
44fc7854 175create_array_ref (tree type, tree ptr, tree alias_ptr_type)
272c6793 176{
44fc7854 177 tree mem_ref;
272c6793 178
272c6793
RS
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
644ffefd 181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
182 return mem_ref;
183}
184
ebfd146a
IR
185/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
186
187/* Function vect_mark_relevant.
188
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
190
191static void
355fe088 192vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 193 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
194{
195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 198 gimple *pattern_stmt;
ebfd146a 199
73fbfcad 200 if (dump_enabled_p ())
66c16fd9
RB
201 {
202 dump_printf_loc (MSG_NOTE, vect_location,
203 "mark relevant %d, live %d: ", relevant, live_p);
204 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 }
ebfd146a 206
83197f37
IR
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
ebfd146a
IR
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212 {
97ecdb46
JJ
213 /* This is the last stmt in a sequence that was detected as a
214 pattern that can potentially be vectorized. Don't mark the stmt
215 as relevant/live because it's not going to be vectorized.
216 Instead mark the pattern-stmt that replaces it. */
83197f37 217
97ecdb46
JJ
218 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
219
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_info = vinfo_for_stmt (pattern_stmt);
225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228 stmt = pattern_stmt;
ebfd146a
IR
229 }
230
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
234
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237 {
73fbfcad 238 if (dump_enabled_p ())
78c60e3d 239 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 240 "already marked relevant/live.\n");
ebfd146a
IR
241 return;
242 }
243
9771b263 244 worklist->safe_push (stmt);
ebfd146a
IR
245}
246
247
b28ead45
AH
248/* Function is_simple_and_all_uses_invariant
249
250 Return true if STMT is simple and all uses of it are invariant. */
251
252bool
253is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
254{
255 tree op;
256 gimple *def_stmt;
257 ssa_op_iter iter;
258
259 if (!is_gimple_assign (stmt))
260 return false;
261
262 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
263 {
264 enum vect_def_type dt = vect_uninitialized_def;
265
266 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
267 {
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 "use not simple.\n");
271 return false;
272 }
273
274 if (dt != vect_external_def && dt != vect_constant_def)
275 return false;
276 }
277 return true;
278}
279
ebfd146a
IR
280/* Function vect_stmt_relevant_p.
281
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
284
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
289
290 CHECKME: what other side effects would the vectorizer allow? */
291
292static bool
355fe088 293vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
294 enum vect_relevant *relevant, bool *live_p)
295{
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
301
8644a673 302 *relevant = vect_unused_in_scope;
ebfd146a
IR
303 *live_p = false;
304
305 /* cond stmt other than loop exit cond. */
b8698a0f
L
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
8644a673 309 *relevant = vect_used_in_scope;
ebfd146a
IR
310
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
ebfd146a 315 {
73fbfcad 316 if (dump_enabled_p ())
78c60e3d 317 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 318 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 319 *relevant = vect_used_in_scope;
ebfd146a
IR
320 }
321
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324 {
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 {
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
329 {
73fbfcad 330 if (dump_enabled_p ())
78c60e3d 331 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 332 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 333
3157b0c2
AO
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
336
ebfd146a
IR
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
341
342 *live_p = true;
343 }
344 }
345 }
346
3a2edf4c
AH
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
349 {
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
354 }
355
ebfd146a
IR
356 return (*live_p || *relevant);
357}
358
359
b8698a0f 360/* Function exist_non_indexing_operands_for_use_p
ebfd146a 361
ff802fa1 362 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
363 used in STMT for anything other than indexing an array. */
364
365static bool
355fe088 366exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
367{
368 tree operand;
369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 370
ff802fa1 371 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
372 reference in STMT, then any operand that corresponds to USE
373 is not indexing an array. */
374 if (!STMT_VINFO_DATA_REF (stmt_info))
375 return true;
59a05b0c 376
ebfd146a
IR
377 /* STMT has a data_ref. FORNOW this means that its of one of
378 the following forms:
379 -1- ARRAY_REF = var
380 -2- var = ARRAY_REF
381 (This should have been verified in analyze_data_refs).
382
383 'var' in the second case corresponds to a def, not a use,
b8698a0f 384 so USE cannot correspond to any operands that are not used
ebfd146a
IR
385 for array indexing.
386
387 Therefore, all we need to check is if STMT falls into the
388 first case, and whether var corresponds to USE. */
ebfd146a
IR
389
390 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
391 {
392 if (is_gimple_call (stmt)
393 && gimple_call_internal_p (stmt))
bfaa08b7
RS
394 {
395 internal_fn ifn = gimple_call_internal_fn (stmt);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (stmt, mask_index))
399 return true;
f307441a
RS
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
402 && use == gimple_call_arg (stmt, stored_value_index))
403 return true;
bfaa08b7
RS
404 if (internal_gather_scatter_fn_p (ifn)
405 && use == gimple_call_arg (stmt, 1))
406 return true;
bfaa08b7 407 }
5ce9450f
JJ
408 return false;
409 }
410
59a05b0c
EB
411 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412 return false;
ebfd146a 413 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
416
417 if (operand == use)
418 return true;
419
420 return false;
421}
422
423
b8698a0f 424/*
ebfd146a
IR
425 Function process_use.
426
427 Inputs:
428 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 430 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
ebfd146a
IR
434
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 442 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 443 of the respective DEF_STMT is left unchanged.
b8698a0f
L
444 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
445 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
446 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
447 be modified accordingly.
448
449 Return true if everything is as expected. Return false otherwise. */
450
451static bool
b28ead45 452process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 453 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 454 bool force)
ebfd146a
IR
455{
456 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
457 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458 stmt_vec_info dstmt_vinfo;
459 basic_block bb, def_bb;
355fe088 460 gimple *def_stmt;
ebfd146a
IR
461 enum vect_def_type dt;
462
b8698a0f 463 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 464 that are used for address computation are not considered relevant. */
aec7ae7d 465 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
466 return true;
467
81c40241 468 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 469 {
73fbfcad 470 if (dump_enabled_p ())
78c60e3d 471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 472 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
473 return false;
474 }
475
476 if (!def_stmt || gimple_nop_p (def_stmt))
477 return true;
478
479 def_bb = gimple_bb (def_stmt);
480 if (!flow_bb_inside_loop_p (loop, def_bb))
481 {
73fbfcad 482 if (dump_enabled_p ())
e645e942 483 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
484 return true;
485 }
486
b8698a0f
L
487 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
488 DEF_STMT must have already been processed, because this should be the
489 only way that STMT, which is a reduction-phi, was put in the worklist,
490 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
491 check that everything is as expected, and we are done. */
492 dstmt_vinfo = vinfo_for_stmt (def_stmt);
493 bb = gimple_bb (stmt);
494 if (gimple_code (stmt) == GIMPLE_PHI
495 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
496 && gimple_code (def_stmt) != GIMPLE_PHI
497 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
498 && bb->loop_father == def_bb->loop_father)
499 {
73fbfcad 500 if (dump_enabled_p ())
78c60e3d 501 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 502 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
503 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
504 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
505 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 506 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 507 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
508 return true;
509 }
510
511 /* case 3a: outer-loop stmt defining an inner-loop stmt:
512 outer-loop-header-bb:
513 d = def_stmt
514 inner-loop:
515 stmt # use (d)
516 outer-loop-tail-bb:
517 ... */
518 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 {
73fbfcad 520 if (dump_enabled_p ())
78c60e3d 521 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 522 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 523
ebfd146a
IR
524 switch (relevant)
525 {
8644a673 526 case vect_unused_in_scope:
7c5222ff
IR
527 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
528 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 529 break;
7c5222ff 530
ebfd146a 531 case vect_used_in_outer_by_reduction:
7c5222ff 532 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
533 relevant = vect_used_by_reduction;
534 break;
7c5222ff 535
ebfd146a 536 case vect_used_in_outer:
7c5222ff 537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 538 relevant = vect_used_in_scope;
ebfd146a 539 break;
7c5222ff 540
8644a673 541 case vect_used_in_scope:
ebfd146a
IR
542 break;
543
544 default:
545 gcc_unreachable ();
b8698a0f 546 }
ebfd146a
IR
547 }
548
549 /* case 3b: inner-loop stmt defining an outer-loop stmt:
550 outer-loop-header-bb:
551 ...
552 inner-loop:
553 d = def_stmt
06066f92 554 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
555 stmt # use (d) */
556 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 {
73fbfcad 558 if (dump_enabled_p ())
78c60e3d 559 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 560 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 561
ebfd146a
IR
562 switch (relevant)
563 {
8644a673 564 case vect_unused_in_scope:
b8698a0f 565 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 566 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 567 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
568 break;
569
ebfd146a 570 case vect_used_by_reduction:
b28ead45 571 case vect_used_only_live:
ebfd146a
IR
572 relevant = vect_used_in_outer_by_reduction;
573 break;
574
8644a673 575 case vect_used_in_scope:
ebfd146a
IR
576 relevant = vect_used_in_outer;
577 break;
578
579 default:
580 gcc_unreachable ();
581 }
582 }
643a9684
RB
583 /* We are also not interested in uses on loop PHI backedges that are
584 inductions. Otherwise we'll needlessly vectorize the IV increment
e294f495
RB
585 and cause hybrid SLP for SLP inductions. Unless the PHI is live
586 of course. */
643a9684
RB
587 else if (gimple_code (stmt) == GIMPLE_PHI
588 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
e294f495 589 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
643a9684
RB
590 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
591 == use))
592 {
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "induction value on backedge.\n");
596 return true;
597 }
598
ebfd146a 599
b28ead45 600 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
601 return true;
602}
603
604
605/* Function vect_mark_stmts_to_be_vectorized.
606
607 Not all stmts in the loop need to be vectorized. For example:
608
609 for i...
610 for j...
611 1. T0 = i + j
612 2. T1 = a[T0]
613
614 3. j = j + 1
615
616 Stmt 1 and 3 do not need to be vectorized, because loop control and
617 addressing of vectorized data-refs are handled differently.
618
619 This pass detects such stmts. */
620
621bool
622vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623{
ebfd146a
IR
624 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
625 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
626 unsigned int nbbs = loop->num_nodes;
627 gimple_stmt_iterator si;
355fe088 628 gimple *stmt;
ebfd146a
IR
629 unsigned int i;
630 stmt_vec_info stmt_vinfo;
631 basic_block bb;
355fe088 632 gimple *phi;
ebfd146a 633 bool live_p;
b28ead45 634 enum vect_relevant relevant;
ebfd146a 635
73fbfcad 636 if (dump_enabled_p ())
78c60e3d 637 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 638 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 639
355fe088 640 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
641
642 /* 1. Init worklist. */
643 for (i = 0; i < nbbs; i++)
644 {
645 bb = bbs[i];
646 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 647 {
ebfd146a 648 phi = gsi_stmt (si);
73fbfcad 649 if (dump_enabled_p ())
ebfd146a 650 {
78c60e3d
SS
651 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
652 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
653 }
654
655 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 656 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
657 }
658 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 {
660 stmt = gsi_stmt (si);
73fbfcad 661 if (dump_enabled_p ())
ebfd146a 662 {
78c60e3d
SS
663 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
664 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 665 }
ebfd146a
IR
666
667 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 668 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
669 }
670 }
671
672 /* 2. Process_worklist */
9771b263 673 while (worklist.length () > 0)
ebfd146a
IR
674 {
675 use_operand_p use_p;
676 ssa_op_iter iter;
677
9771b263 678 stmt = worklist.pop ();
73fbfcad 679 if (dump_enabled_p ())
ebfd146a 680 {
78c60e3d
SS
681 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
683 }
684
b8698a0f 685 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
686 (DEF_STMT) as relevant/irrelevant according to the relevance property
687 of STMT. */
ebfd146a
IR
688 stmt_vinfo = vinfo_for_stmt (stmt);
689 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 690
b28ead45
AH
691 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
692 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
693
694 One exception is when STMT has been identified as defining a reduction
b28ead45 695 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 696 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 697 those that are used by a reduction computation, and those that are
ff802fa1 698 (also) used by a regular computation. This allows us later on to
b8698a0f 699 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 700 order of the results that they produce does not have to be kept. */
ebfd146a 701
b28ead45 702 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 703 {
06066f92 704 case vect_reduction_def:
b28ead45
AH
705 gcc_assert (relevant != vect_unused_in_scope);
706 if (relevant != vect_unused_in_scope
707 && relevant != vect_used_in_scope
708 && relevant != vect_used_by_reduction
709 && relevant != vect_used_only_live)
06066f92 710 {
b28ead45
AH
711 if (dump_enabled_p ())
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "unsupported use of reduction.\n");
714 return false;
06066f92 715 }
06066f92 716 break;
b8698a0f 717
06066f92 718 case vect_nested_cycle:
b28ead45
AH
719 if (relevant != vect_unused_in_scope
720 && relevant != vect_used_in_outer_by_reduction
721 && relevant != vect_used_in_outer)
06066f92 722 {
73fbfcad 723 if (dump_enabled_p ())
78c60e3d 724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 725 "unsupported use of nested cycle.\n");
7c5222ff 726
06066f92
IR
727 return false;
728 }
b8698a0f
L
729 break;
730
06066f92 731 case vect_double_reduction_def:
b28ead45
AH
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
06066f92 735 {
73fbfcad 736 if (dump_enabled_p ())
78c60e3d 737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 738 "unsupported use of double reduction.\n");
7c5222ff 739
7c5222ff 740 return false;
06066f92 741 }
b8698a0f 742 break;
7c5222ff 743
06066f92
IR
744 default:
745 break;
7c5222ff 746 }
b8698a0f 747
aec7ae7d 748 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
749 {
750 /* Pattern statements are not inserted into the code, so
751 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
752 have to scan the RHS or function arguments instead. */
753 if (is_gimple_assign (stmt))
754 {
69d2aade
JJ
755 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
756 tree op = gimple_assign_rhs1 (stmt);
757
758 i = 1;
759 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 {
761 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 762 relevant, &worklist, false)
69d2aade 763 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 764 relevant, &worklist, false))
566d377a 765 return false;
69d2aade
JJ
766 i = 2;
767 }
768 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 769 {
69d2aade 770 op = gimple_op (stmt, i);
afbe6325 771 if (TREE_CODE (op) == SSA_NAME
b28ead45 772 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 773 &worklist, false))
07687835 774 return false;
9d5e7640
IR
775 }
776 }
777 else if (is_gimple_call (stmt))
778 {
779 for (i = 0; i < gimple_call_num_args (stmt); i++)
780 {
781 tree arg = gimple_call_arg (stmt, i);
b28ead45 782 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 783 &worklist, false))
07687835 784 return false;
9d5e7640
IR
785 }
786 }
787 }
788 else
789 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790 {
791 tree op = USE_FROM_PTR (use_p);
b28ead45 792 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 793 &worklist, false))
07687835 794 return false;
9d5e7640 795 }
aec7ae7d 796
3bab6342 797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d 798 {
134c85ca
RS
799 gather_scatter_info gs_info;
800 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
801 gcc_unreachable ();
802 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
803 &worklist, true))
566d377a 804 return false;
aec7ae7d 805 }
ebfd146a
IR
806 } /* while worklist */
807
ebfd146a
IR
808 return true;
809}
810
811
b8698a0f 812/* Function vect_model_simple_cost.
ebfd146a 813
b8698a0f 814 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
815 single op. Right now, this does not account for multiple insns that could
816 be generated for the single vector op. We will handle that shortly. */
817
818void
b8698a0f 819vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349 820 enum vect_def_type *dt,
4fc5ebf1 821 int ndts,
92345349
BS
822 stmt_vector_for_cost *prologue_cost_vec,
823 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
824{
825 int i;
92345349 826 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
827
828 /* The SLP costs were already calculated during SLP tree build. */
78604de0 829 gcc_assert (!PURE_SLP_STMT (stmt_info));
ebfd146a 830
4fc5ebf1
JG
831 /* Cost the "broadcast" of a scalar operand in to a vector operand.
832 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833 cost model. */
834 for (i = 0; i < ndts; i++)
92345349 835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
4fc5ebf1 836 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
92345349 837 stmt_info, 0, vect_prologue);
c3e7ee41
BS
838
839 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
840 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 stmt_info, 0, vect_body);
c3e7ee41 842
73fbfcad 843 if (dump_enabled_p ())
78c60e3d
SS
844 dump_printf_loc (MSG_NOTE, vect_location,
845 "vect_model_simple_cost: inside_cost = %d, "
e645e942 846 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
847}
848
849
8bd37302
BS
850/* Model cost for type demotion and promotion operations. PWR is normally
851 zero for single-step promotions and demotions. It will be one if
852 two-step promotion/demotion is required, and so on. Each additional
853 step doubles the number of instructions required. */
854
855static void
856vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 enum vect_def_type *dt, int pwr)
858{
859 int i, tmp;
92345349 860 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863 void *target_cost_data;
8bd37302
BS
864
865 /* The SLP costs were already calculated during SLP tree build. */
78604de0 866 gcc_assert (!PURE_SLP_STMT (stmt_info));
8bd37302 867
c3e7ee41
BS
868 if (loop_vinfo)
869 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
870 else
871 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872
8bd37302
BS
873 for (i = 0; i < pwr + 1; i++)
874 {
875 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
876 (i + 1) : i;
c3e7ee41 877 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
878 vec_promote_demote, stmt_info, 0,
879 vect_body);
8bd37302
BS
880 }
881
882 /* FORNOW: Assuming maximum 2 args per stmts. */
883 for (i = 0; i < 2; i++)
92345349
BS
884 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
885 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
886 stmt_info, 0, vect_prologue);
8bd37302 887
73fbfcad 888 if (dump_enabled_p ())
78c60e3d
SS
889 dump_printf_loc (MSG_NOTE, vect_location,
890 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 891 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
892}
893
ebfd146a
IR
894/* Function vect_model_store_cost
895
0d0293ac
MM
896 Models cost for stores. In the case of grouped accesses, one access
897 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
898
899void
b8698a0f 900vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee 901 vect_memory_access_type memory_access_type,
9ce4345a 902 vec_load_store_type vls_type, slp_tree slp_node,
92345349
BS
903 stmt_vector_for_cost *prologue_cost_vec,
904 stmt_vector_for_cost *body_cost_vec)
ebfd146a 905{
92345349 906 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f
RS
907 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
908 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
909 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 910
9ce4345a 911 if (vls_type == VLS_STORE_INVARIANT)
92345349
BS
912 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
913 stmt_info, 0, vect_prologue);
ebfd146a 914
892a981f
RS
915 /* Grouped stores update all elements in the group at once,
916 so we want the DR for the first statement. */
917 if (!slp_node && grouped_access_p)
720f5239 918 {
892a981f
RS
919 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
920 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
720f5239 921 }
ebfd146a 922
892a981f
RS
923 /* True if we should include any once-per-group costs as well as
924 the cost of the statement itself. For SLP we only get called
925 once per group anyhow. */
926 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927
272c6793 928 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 929 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793 930 access is instead being provided by a permute-and-store operation,
2de001ee
RS
931 include the cost of the permutes. */
932 if (first_stmt_p
933 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 934 {
e1377713
ES
935 /* Uses a high and low interleave or shuffle operations for each
936 needed permute. */
892a981f 937 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
e1377713 938 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
939 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
940 stmt_info, 0, vect_body);
ebfd146a 941
73fbfcad 942 if (dump_enabled_p ())
78c60e3d 943 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 944 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 945 group_size);
ebfd146a
IR
946 }
947
cee62fee 948 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 949 /* Costs of the stores. */
067bc855
RB
950 if (memory_access_type == VMAT_ELEMENTWISE
951 || memory_access_type == VMAT_GATHER_SCATTER)
c5126ce8
RS
952 {
953 /* N scalar stores plus extracting the elements. */
954 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
955 inside_cost += record_stmt_cost (body_cost_vec,
956 ncopies * assumed_nunits,
957 scalar_store, stmt_info, 0, vect_body);
958 }
f2e2a985 959 else
892a981f 960 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 961
2de001ee
RS
962 if (memory_access_type == VMAT_ELEMENTWISE
963 || memory_access_type == VMAT_STRIDED_SLP)
c5126ce8
RS
964 {
965 /* N scalar stores plus extracting the elements. */
966 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
967 inside_cost += record_stmt_cost (body_cost_vec,
968 ncopies * assumed_nunits,
969 vec_to_scalar, stmt_info, 0, vect_body);
970 }
cee62fee 971
73fbfcad 972 if (dump_enabled_p ())
78c60e3d
SS
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: inside_cost = %d, "
e645e942 975 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
976}
977
978
720f5239
IR
979/* Calculate cost of DR's memory access. */
980void
981vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 982 unsigned int *inside_cost,
92345349 983 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
984{
985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 986 gimple *stmt = DR_STMT (dr);
c3e7ee41 987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
988
989 switch (alignment_support_scheme)
990 {
991 case dr_aligned:
992 {
92345349
BS
993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 vector_store, stmt_info, 0,
995 vect_body);
720f5239 996
73fbfcad 997 if (dump_enabled_p ())
78c60e3d 998 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 999 "vect_model_store_cost: aligned.\n");
720f5239
IR
1000 break;
1001 }
1002
1003 case dr_unaligned_supported:
1004 {
720f5239 1005 /* Here, we assign an additional cost for the unaligned store. */
92345349 1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1007 unaligned_store, stmt_info,
92345349 1008 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1009 if (dump_enabled_p ())
78c60e3d
SS
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_model_store_cost: unaligned supported by "
e645e942 1012 "hardware.\n");
720f5239
IR
1013 break;
1014 }
1015
38eec4c6
UW
1016 case dr_unaligned_unsupported:
1017 {
1018 *inside_cost = VECT_MAX_COST;
1019
73fbfcad 1020 if (dump_enabled_p ())
78c60e3d 1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1022 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1023 break;
1024 }
1025
720f5239
IR
1026 default:
1027 gcc_unreachable ();
1028 }
1029}
1030
1031
ebfd146a
IR
1032/* Function vect_model_load_cost
1033
892a981f
RS
1034 Models cost for loads. In the case of grouped accesses, one access has
1035 the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1036 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1037 access scheme chosen. */
1038
1039void
92345349 1040vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
2de001ee
RS
1041 vect_memory_access_type memory_access_type,
1042 slp_tree slp_node,
92345349
BS
1043 stmt_vector_for_cost *prologue_cost_vec,
1044 stmt_vector_for_cost *body_cost_vec)
ebfd146a 1045{
892a981f
RS
1046 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
92345349 1048 unsigned int inside_cost = 0, prologue_cost = 0;
892a981f 1049 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
ebfd146a 1050
892a981f
RS
1051 /* Grouped loads read all elements in the group at once,
1052 so we want the DR for the first statement. */
1053 if (!slp_node && grouped_access_p)
ebfd146a 1054 {
892a981f
RS
1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
ebfd146a
IR
1057 }
1058
892a981f
RS
1059 /* True if we should include any once-per-group costs as well as
1060 the cost of the statement itself. For SLP we only get called
1061 once per group anyhow. */
1062 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1063
272c6793 1064 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1065 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793 1066 access is instead being provided by a load-and-permute operation,
2de001ee
RS
1067 include the cost of the permutes. */
1068 if (first_stmt_p
1069 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
ebfd146a 1070 {
2c23db6d
ES
1071 /* Uses an even and odd extract operations or shuffle operations
1072 for each needed permute. */
892a981f 1073 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2c23db6d
ES
1074 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1075 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
ebfd146a 1077
73fbfcad 1078 if (dump_enabled_p ())
e645e942
TJ
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1081 group_size);
ebfd146a
IR
1082 }
1083
1084 /* The loads themselves. */
067bc855
RB
1085 if (memory_access_type == VMAT_ELEMENTWISE
1086 || memory_access_type == VMAT_GATHER_SCATTER)
a82960aa 1087 {
a21892ad
BS
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
c5126ce8 1090 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
92345349 1091 inside_cost += record_stmt_cost (body_cost_vec,
c5126ce8 1092 ncopies * assumed_nunits,
92345349 1093 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1094 }
1095 else
892a981f 1096 vect_get_load_cost (dr, ncopies, first_stmt_p,
92345349
BS
1097 &inside_cost, &prologue_cost,
1098 prologue_cost_vec, body_cost_vec, true);
2de001ee
RS
1099 if (memory_access_type == VMAT_ELEMENTWISE
1100 || memory_access_type == VMAT_STRIDED_SLP)
892a981f
RS
1101 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102 stmt_info, 0, vect_body);
720f5239 1103
73fbfcad 1104 if (dump_enabled_p ())
78c60e3d
SS
1105 dump_printf_loc (MSG_NOTE, vect_location,
1106 "vect_model_load_cost: inside_cost = %d, "
e645e942 1107 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1108}
1109
1110
1111/* Calculate cost of DR's memory access. */
1112void
1113vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1114 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1115 unsigned int *prologue_cost,
1116 stmt_vector_for_cost *prologue_cost_vec,
1117 stmt_vector_for_cost *body_cost_vec,
1118 bool record_prologue_costs)
720f5239
IR
1119{
1120 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1121 gimple *stmt = DR_STMT (dr);
c3e7ee41 1122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1123
1124 switch (alignment_support_scheme)
ebfd146a
IR
1125 {
1126 case dr_aligned:
1127 {
92345349
BS
1128 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1129 stmt_info, 0, vect_body);
ebfd146a 1130
73fbfcad 1131 if (dump_enabled_p ())
78c60e3d 1132 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1133 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1134
1135 break;
1136 }
1137 case dr_unaligned_supported:
1138 {
720f5239 1139 /* Here, we assign an additional cost for the unaligned load. */
92345349 1140 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1141 unaligned_load, stmt_info,
92345349 1142 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1143
73fbfcad 1144 if (dump_enabled_p ())
78c60e3d
SS
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: unaligned supported by "
e645e942 1147 "hardware.\n");
ebfd146a
IR
1148
1149 break;
1150 }
1151 case dr_explicit_realign:
1152 {
92345349
BS
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1154 vector_load, stmt_info, 0, vect_body);
1155 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1156 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1157
1158 /* FIXME: If the misalignment remains fixed across the iterations of
1159 the containing loop, the following cost should be added to the
92345349 1160 prologue costs. */
ebfd146a 1161 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1162 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1163 stmt_info, 0, vect_body);
ebfd146a 1164
73fbfcad 1165 if (dump_enabled_p ())
e645e942
TJ
1166 dump_printf_loc (MSG_NOTE, vect_location,
1167 "vect_model_load_cost: explicit realign\n");
8bd37302 1168
ebfd146a
IR
1169 break;
1170 }
1171 case dr_explicit_realign_optimized:
1172 {
73fbfcad 1173 if (dump_enabled_p ())
e645e942 1174 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1175 "vect_model_load_cost: unaligned software "
e645e942 1176 "pipelined.\n");
ebfd146a
IR
1177
1178 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1179 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1180 if this is an access in a group of loads, which provide grouped
ebfd146a 1181 access, then the above cost should only be considered for one
ff802fa1 1182 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1183 and a realignment op. */
1184
92345349 1185 if (add_realign_cost && record_prologue_costs)
ebfd146a 1186 {
92345349
BS
1187 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1188 vector_stmt, stmt_info,
1189 0, vect_prologue);
ebfd146a 1190 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1191 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1192 vector_stmt, stmt_info,
1193 0, vect_prologue);
ebfd146a
IR
1194 }
1195
92345349
BS
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1197 stmt_info, 0, vect_body);
1198 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1199 stmt_info, 0, vect_body);
8bd37302 1200
73fbfcad 1201 if (dump_enabled_p ())
78c60e3d 1202 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1203 "vect_model_load_cost: explicit realign optimized"
1204 "\n");
8bd37302 1205
ebfd146a
IR
1206 break;
1207 }
1208
38eec4c6
UW
1209 case dr_unaligned_unsupported:
1210 {
1211 *inside_cost = VECT_MAX_COST;
1212
73fbfcad 1213 if (dump_enabled_p ())
78c60e3d 1214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1215 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1216 break;
1217 }
1218
ebfd146a
IR
1219 default:
1220 gcc_unreachable ();
1221 }
ebfd146a
IR
1222}
1223
418b7df3
RG
1224/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1226
418b7df3 1227static void
355fe088 1228vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1229{
ebfd146a 1230 if (gsi)
418b7df3 1231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1232 else
1233 {
418b7df3 1234 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1236
a70d6342
IR
1237 if (loop_vinfo)
1238 {
1239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1240 basic_block new_bb;
1241 edge pe;
a70d6342
IR
1242
1243 if (nested_in_vect_loop_p (loop, stmt))
1244 loop = loop->inner;
b8698a0f 1245
a70d6342 1246 pe = loop_preheader_edge (loop);
418b7df3 1247 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1248 gcc_assert (!new_bb);
1249 }
1250 else
1251 {
1252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1253 basic_block bb;
1254 gimple_stmt_iterator gsi_bb_start;
1255
1256 gcc_assert (bb_vinfo);
1257 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1258 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1259 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1260 }
ebfd146a
IR
1261 }
1262
73fbfcad 1263 if (dump_enabled_p ())
ebfd146a 1264 {
78c60e3d
SS
1265 dump_printf_loc (MSG_NOTE, vect_location,
1266 "created new init_stmt: ");
1267 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1268 }
418b7df3
RG
1269}
1270
1271/* Function vect_init_vector.
ebfd146a 1272
5467ee52
RG
1273 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1275 vector type a vector with all elements equal to VAL is created first.
1276 Place the initialization at BSI if it is not NULL. Otherwise, place the
1277 initialization at the loop preheader.
418b7df3
RG
1278 Return the DEF of INIT_STMT.
1279 It will be used in the vectorization of STMT. */
1280
1281tree
355fe088 1282vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1283{
355fe088 1284 gimple *init_stmt;
418b7df3
RG
1285 tree new_temp;
1286
e412ece4
RB
1287 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1288 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1289 {
e412ece4
RB
1290 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1291 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1292 {
5a308cf1
IE
1293 /* Scalar boolean value should be transformed into
1294 all zeros or all ones value before building a vector. */
1295 if (VECTOR_BOOLEAN_TYPE_P (type))
1296 {
b3d51f23
IE
1297 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1298 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1299
1300 if (CONSTANT_CLASS_P (val))
1301 val = integer_zerop (val) ? false_val : true_val;
1302 else
1303 {
1304 new_temp = make_ssa_name (TREE_TYPE (type));
1305 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1306 val, true_val, false_val);
1307 vect_init_vector_1 (stmt, init_stmt, gsi);
1308 val = new_temp;
1309 }
1310 }
1311 else if (CONSTANT_CLASS_P (val))
42fd8198 1312 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1313 else
1314 {
b731b390 1315 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1316 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1317 init_stmt = gimple_build_assign (new_temp,
1318 fold_build1 (VIEW_CONVERT_EXPR,
1319 TREE_TYPE (type),
1320 val));
1321 else
1322 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1323 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1324 val = new_temp;
418b7df3
RG
1325 }
1326 }
5467ee52 1327 val = build_vector_from_val (type, val);
418b7df3
RG
1328 }
1329
0e22bb5a
RB
1330 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1331 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1332 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1333 return new_temp;
ebfd146a
IR
1334}
1335
c83a894c 1336/* Function vect_get_vec_def_for_operand_1.
a70d6342 1337
c83a894c
AH
1338 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1340
1341tree
c83a894c 1342vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1343{
1344 tree vec_oprnd;
355fe088 1345 gimple *vec_stmt;
ebfd146a 1346 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1347
1348 switch (dt)
1349 {
81c40241 1350 /* operand is a constant or a loop invariant. */
ebfd146a 1351 case vect_constant_def:
81c40241 1352 case vect_external_def:
c83a894c
AH
1353 /* Code should use vect_get_vec_def_for_operand. */
1354 gcc_unreachable ();
ebfd146a 1355
81c40241 1356 /* operand is defined inside the loop. */
8644a673 1357 case vect_internal_def:
ebfd146a 1358 {
ebfd146a
IR
1359 /* Get the def from the vectorized stmt. */
1360 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1361
ebfd146a 1362 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1363 /* Get vectorized pattern statement. */
1364 if (!vec_stmt
1365 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1366 && !STMT_VINFO_RELEVANT (def_stmt_info))
1367 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1369 gcc_assert (vec_stmt);
1370 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1371 vec_oprnd = PHI_RESULT (vec_stmt);
1372 else if (is_gimple_call (vec_stmt))
1373 vec_oprnd = gimple_call_lhs (vec_stmt);
1374 else
1375 vec_oprnd = gimple_assign_lhs (vec_stmt);
1376 return vec_oprnd;
1377 }
1378
c78e3652 1379 /* operand is defined by a loop header phi. */
ebfd146a 1380 case vect_reduction_def:
06066f92 1381 case vect_double_reduction_def:
7c5222ff 1382 case vect_nested_cycle:
ebfd146a
IR
1383 case vect_induction_def:
1384 {
1385 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386
1387 /* Get the def from the vectorized stmt. */
1388 def_stmt_info = vinfo_for_stmt (def_stmt);
1389 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1390 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391 vec_oprnd = PHI_RESULT (vec_stmt);
1392 else
1393 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1394 return vec_oprnd;
1395 }
1396
1397 default:
1398 gcc_unreachable ();
1399 }
1400}
1401
1402
c83a894c
AH
1403/* Function vect_get_vec_def_for_operand.
1404
1405 OP is an operand in STMT. This function returns a (vector) def that will be
1406 used in the vectorized stmt for STMT.
1407
1408 In the case that OP is an SSA_NAME which is defined in the loop, then
1409 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410
1411 In case OP is an invariant or constant, a new stmt that creates a vector def
1412 needs to be introduced. VECTYPE may be used to specify a required type for
1413 vector invariant. */
1414
1415tree
1416vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417{
1418 gimple *def_stmt;
1419 enum vect_def_type dt;
1420 bool is_simple_use;
1421 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1422 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423
1424 if (dump_enabled_p ())
1425 {
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "vect_get_vec_def_for_operand: ");
1428 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1429 dump_printf (MSG_NOTE, "\n");
1430 }
1431
1432 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1433 gcc_assert (is_simple_use);
1434 if (def_stmt && dump_enabled_p ())
1435 {
1436 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1437 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438 }
1439
1440 if (dt == vect_constant_def || dt == vect_external_def)
1441 {
1442 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1443 tree vector_type;
1444
1445 if (vectype)
1446 vector_type = vectype;
2568d8a1 1447 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c83a894c
AH
1448 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1449 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1450 else
1451 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452
1453 gcc_assert (vector_type);
1454 return vect_init_vector (stmt, op, vector_type, NULL);
1455 }
1456 else
1457 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1458}
1459
1460
ebfd146a
IR
1461/* Function vect_get_vec_def_for_stmt_copy
1462
ff802fa1 1463 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1464 vectorized stmt to be created (by the caller to this function) is a "copy"
1465 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1466 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1467 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1468 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1469 DT is the type of the vector def VEC_OPRND.
1470
1471 Context:
1472 In case the vectorization factor (VF) is bigger than the number
1473 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1474 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1475 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1476 smallest data-type determines the VF, and as a result, when vectorizing
1477 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1479 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1480 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481 which VF=16 and nunits=4, so the number of copies required is 4):
1482
1483 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1484
ebfd146a
IR
1485 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1486 VS1.1: vx.1 = memref1 VS1.2
1487 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1488 VS1.3: vx.3 = memref3
ebfd146a
IR
1489
1490 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1491 VSnew.1: vz1 = vx.1 + ... VSnew.2
1492 VSnew.2: vz2 = vx.2 + ... VSnew.3
1493 VSnew.3: vz3 = vx.3 + ...
1494
1495 The vectorization of S1 is explained in vectorizable_load.
1496 The vectorization of S2:
b8698a0f
L
1497 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1499 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1500 returns the vector-def 'vx.0'.
1501
b8698a0f
L
1502 To create the remaining copies of the vector-stmt (VSnew.j), this
1503 function is called to get the relevant vector-def for each operand. It is
1504 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1505 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506
b8698a0f
L
1507 For example, to obtain the vector-def 'vx.1' in order to create the
1508 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1510 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511 and return its def ('vx.1').
1512 Overall, to create the above sequence this function will be called 3 times:
1513 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1516
1517tree
1518vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519{
355fe088 1520 gimple *vec_stmt_for_operand;
ebfd146a
IR
1521 stmt_vec_info def_stmt_info;
1522
1523 /* Do nothing; can reuse same def. */
8644a673 1524 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1525 return vec_oprnd;
1526
1527 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1528 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1529 gcc_assert (def_stmt_info);
1530 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1531 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1532 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1533 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1534 else
1535 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536 return vec_oprnd;
1537}
1538
1539
1540/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1541 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a 1542
c78e3652 1543void
b8698a0f 1544vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1545 vec<tree> *vec_oprnds0,
1546 vec<tree> *vec_oprnds1)
ebfd146a 1547{
9771b263 1548 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1549
1550 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1551 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1552
9771b263 1553 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1554 {
9771b263 1555 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1556 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1557 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1558 }
1559}
1560
1561
c78e3652 1562/* Get vectorized definitions for OP0 and OP1. */
ebfd146a 1563
c78e3652 1564void
355fe088 1565vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1566 vec<tree> *vec_oprnds0,
1567 vec<tree> *vec_oprnds1,
306b0c92 1568 slp_tree slp_node)
ebfd146a
IR
1569{
1570 if (slp_node)
d092494c
IR
1571 {
1572 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1573 auto_vec<tree> ops (nops);
1574 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1575
9771b263 1576 ops.quick_push (op0);
d092494c 1577 if (op1)
9771b263 1578 ops.quick_push (op1);
d092494c 1579
306b0c92 1580 vect_get_slp_defs (ops, slp_node, &vec_defs);
d092494c 1581
37b5ec8f 1582 *vec_oprnds0 = vec_defs[0];
d092494c 1583 if (op1)
37b5ec8f 1584 *vec_oprnds1 = vec_defs[1];
d092494c 1585 }
ebfd146a
IR
1586 else
1587 {
1588 tree vec_oprnd;
1589
9771b263 1590 vec_oprnds0->create (1);
81c40241 1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1592 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1593
1594 if (op1)
1595 {
9771b263 1596 vec_oprnds1->create (1);
81c40241 1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1598 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1599 }
1600 }
1601}
1602
bb6c2b68
RS
1603/* Helper function called by vect_finish_replace_stmt and
1604 vect_finish_stmt_generation. Set the location of the new
1605 statement and create a stmt_vec_info for it. */
1606
1607static void
1608vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1609{
1610 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1611 vec_info *vinfo = stmt_info->vinfo;
1612
1613 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1614
1615 if (dump_enabled_p ())
1616 {
1617 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1618 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1619 }
1620
1621 gimple_set_location (vec_stmt, gimple_location (stmt));
1622
1623 /* While EH edges will generally prevent vectorization, stmt might
1624 e.g. be in a must-not-throw region. Ensure newly created stmts
1625 that could throw are part of the same region. */
1626 int lp_nr = lookup_stmt_eh_lp (stmt);
1627 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1628 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1629}
1630
1631/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1632 which sets the same scalar result as STMT did. */
1633
1634void
1635vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1636{
1637 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1638
1639 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1640 gsi_replace (&gsi, vec_stmt, false);
1641
1642 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1643}
ebfd146a
IR
1644
1645/* Function vect_finish_stmt_generation.
1646
1647 Insert a new stmt. */
1648
1649void
355fe088 1650vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1651 gimple_stmt_iterator *gsi)
1652{
ebfd146a
IR
1653 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1654
54e8e2c3
RG
1655 if (!gsi_end_p (*gsi)
1656 && gimple_has_mem_ops (vec_stmt))
1657 {
355fe088 1658 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1659 tree vuse = gimple_vuse (at_stmt);
1660 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1661 {
1662 tree vdef = gimple_vdef (at_stmt);
1663 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664 /* If we have an SSA vuse and insert a store, update virtual
1665 SSA form to avoid triggering the renamer. Do so only
1666 if we can easily see all uses - which is what almost always
1667 happens with the way vectorized stmts are inserted. */
1668 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669 && ((is_gimple_assign (vec_stmt)
1670 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671 || (is_gimple_call (vec_stmt)
1672 && !(gimple_call_flags (vec_stmt)
1673 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1674 {
1675 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676 gimple_set_vdef (vec_stmt, new_vdef);
1677 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1678 }
1679 }
1680 }
ebfd146a 1681 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
bb6c2b68 1682 vect_finish_stmt_generation_1 (stmt, vec_stmt);
ebfd146a
IR
1683}
1684
70439f0d
RS
1685/* We want to vectorize a call to combined function CFN with function
1686 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1687 as the types of all inputs. Check whether this is possible using
1688 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1689
70439f0d
RS
1690static internal_fn
1691vectorizable_internal_function (combined_fn cfn, tree fndecl,
1692 tree vectype_out, tree vectype_in)
ebfd146a 1693{
70439f0d
RS
1694 internal_fn ifn;
1695 if (internal_fn_p (cfn))
1696 ifn = as_internal_fn (cfn);
1697 else
1698 ifn = associated_internal_fn (fndecl);
1699 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1700 {
1701 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1702 if (info.vectorizable)
1703 {
1704 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1705 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1706 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1707 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1708 return ifn;
1709 }
1710 }
1711 return IFN_LAST;
ebfd146a
IR
1712}
1713
5ce9450f 1714
355fe088 1715static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1716 gimple_stmt_iterator *);
1717
7cfb4d93
RS
1718/* Check whether a load or store statement in the loop described by
1719 LOOP_VINFO is possible in a fully-masked loop. This is testing
1720 whether the vectorizer pass has the appropriate support, as well as
1721 whether the target does.
1722
1723 VLS_TYPE says whether the statement is a load or store and VECTYPE
1724 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1725 says how the load or store is going to be implemented and GROUP_SIZE
1726 is the number of load or store statements in the containing group.
bfaa08b7
RS
1727 If the access is a gather load or scatter store, GS_INFO describes
1728 its arguments.
7cfb4d93
RS
1729
1730 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1731 supported, otherwise record the required mask types. */
1732
1733static void
1734check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1735 vec_load_store_type vls_type, int group_size,
bfaa08b7
RS
1736 vect_memory_access_type memory_access_type,
1737 gather_scatter_info *gs_info)
7cfb4d93
RS
1738{
1739 /* Invariant loads need no special support. */
1740 if (memory_access_type == VMAT_INVARIANT)
1741 return;
1742
1743 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1744 machine_mode vecmode = TYPE_MODE (vectype);
1745 bool is_load = (vls_type == VLS_LOAD);
1746 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1747 {
1748 if (is_load
1749 ? !vect_load_lanes_supported (vectype, group_size, true)
1750 : !vect_store_lanes_supported (vectype, group_size, true))
1751 {
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754 "can't use a fully-masked loop because the"
1755 " target doesn't have an appropriate masked"
1756 " load/store-lanes instruction.\n");
1757 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1758 return;
1759 }
1760 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1761 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1762 return;
1763 }
1764
bfaa08b7
RS
1765 if (memory_access_type == VMAT_GATHER_SCATTER)
1766 {
f307441a
RS
1767 internal_fn ifn = (is_load
1768 ? IFN_MASK_GATHER_LOAD
1769 : IFN_MASK_SCATTER_STORE);
bfaa08b7 1770 tree offset_type = TREE_TYPE (gs_info->offset);
f307441a 1771 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
bfaa08b7
RS
1772 gs_info->memory_type,
1773 TYPE_SIGN (offset_type),
1774 gs_info->scale))
1775 {
1776 if (dump_enabled_p ())
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 "can't use a fully-masked loop because the"
1779 " target doesn't have an appropriate masked"
f307441a 1780 " gather load or scatter store instruction.\n");
bfaa08b7
RS
1781 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1782 return;
1783 }
1784 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1785 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1786 return;
1787 }
1788
7cfb4d93
RS
1789 if (memory_access_type != VMAT_CONTIGUOUS
1790 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1791 {
1792 /* Element X of the data must come from iteration i * VF + X of the
1793 scalar loop. We need more work to support other mappings. */
1794 if (dump_enabled_p ())
1795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796 "can't use a fully-masked loop because an access"
1797 " isn't contiguous.\n");
1798 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1799 return;
1800 }
1801
1802 machine_mode mask_mode;
1803 if (!(targetm.vectorize.get_mask_mode
1804 (GET_MODE_NUNITS (vecmode),
1805 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1806 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1807 {
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "can't use a fully-masked loop because the target"
1811 " doesn't have the appropriate masked load or"
1812 " store.\n");
1813 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1814 return;
1815 }
1816 /* We might load more scalars than we need for permuting SLP loads.
1817 We checked in get_group_load_store_type that the extra elements
1818 don't leak into a new vector. */
1819 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1820 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1821 unsigned int nvectors;
1822 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1823 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1824 else
1825 gcc_unreachable ();
1826}
1827
1828/* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1829 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1830 that needs to be applied to all loads and stores in a vectorized loop.
1831 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1832
1833 MASK_TYPE is the type of both masks. If new statements are needed,
1834 insert them before GSI. */
1835
1836static tree
1837prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1838 gimple_stmt_iterator *gsi)
1839{
1840 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1841 if (!loop_mask)
1842 return vec_mask;
1843
1844 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1845 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1846 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1847 vec_mask, loop_mask);
1848 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1849 return and_res;
1850}
1851
429ef523
RS
1852/* Determine whether we can use a gather load or scatter store to vectorize
1853 strided load or store STMT by truncating the current offset to a smaller
1854 width. We need to be able to construct an offset vector:
1855
1856 { 0, X, X*2, X*3, ... }
1857
1858 without loss of precision, where X is STMT's DR_STEP.
1859
1860 Return true if this is possible, describing the gather load or scatter
1861 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1862
1863static bool
1864vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1865 bool masked_p,
1866 gather_scatter_info *gs_info)
1867{
1868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1869 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1870 tree step = DR_STEP (dr);
1871 if (TREE_CODE (step) != INTEGER_CST)
1872 {
1873 /* ??? Perhaps we could use range information here? */
1874 if (dump_enabled_p ())
1875 dump_printf_loc (MSG_NOTE, vect_location,
1876 "cannot truncate variable step.\n");
1877 return false;
1878 }
1879
1880 /* Get the number of bits in an element. */
1881 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1882 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1883 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1884
1885 /* Set COUNT to the upper limit on the number of elements - 1.
1886 Start with the maximum vectorization factor. */
1887 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1888
1889 /* Try lowering COUNT to the number of scalar latch iterations. */
1890 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1891 widest_int max_iters;
1892 if (max_loop_iterations (loop, &max_iters)
1893 && max_iters < count)
1894 count = max_iters.to_shwi ();
1895
1896 /* Try scales of 1 and the element size. */
1897 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
1898 bool overflow_p = false;
1899 for (int i = 0; i < 2; ++i)
1900 {
1901 int scale = scales[i];
1902 widest_int factor;
1903 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1904 continue;
1905
1906 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1907 in OFFSET_BITS bits. */
1908 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
1909 if (overflow_p)
1910 continue;
1911 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1912 if (wi::min_precision (range, sign) > element_bits)
1913 {
1914 overflow_p = true;
1915 continue;
1916 }
1917
1918 /* See whether the target supports the operation. */
1919 tree memory_type = TREE_TYPE (DR_REF (dr));
1920 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
1921 memory_type, element_bits, sign, scale,
1922 &gs_info->ifn, &gs_info->element_type))
1923 continue;
1924
1925 tree offset_type = build_nonstandard_integer_type (element_bits,
1926 sign == UNSIGNED);
1927
1928 gs_info->decl = NULL_TREE;
1929 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1930 but we don't need to store that here. */
1931 gs_info->base = NULL_TREE;
1932 gs_info->offset = fold_convert (offset_type, step);
929b4411 1933 gs_info->offset_dt = vect_constant_def;
429ef523
RS
1934 gs_info->offset_vectype = NULL_TREE;
1935 gs_info->scale = scale;
1936 gs_info->memory_type = memory_type;
1937 return true;
1938 }
1939
1940 if (overflow_p && dump_enabled_p ())
1941 dump_printf_loc (MSG_NOTE, vect_location,
1942 "truncating gather/scatter offset to %d bits"
1943 " might change its value.\n", element_bits);
1944
1945 return false;
1946}
1947
ab2fc782
RS
1948/* Return true if we can use gather/scatter internal functions to
1949 vectorize STMT, which is a grouped or strided load or store.
429ef523
RS
1950 MASKED_P is true if load or store is conditional. When returning
1951 true, fill in GS_INFO with the information required to perform the
1952 operation. */
ab2fc782
RS
1953
1954static bool
1955vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
429ef523 1956 bool masked_p,
ab2fc782
RS
1957 gather_scatter_info *gs_info)
1958{
1959 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
1960 || gs_info->decl)
429ef523
RS
1961 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
1962 masked_p, gs_info);
ab2fc782
RS
1963
1964 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
1965 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1966 tree offset_type = TREE_TYPE (gs_info->offset);
1967 unsigned int offset_bits = TYPE_PRECISION (offset_type);
1968
1969 /* Enforced by vect_check_gather_scatter. */
1970 gcc_assert (element_bits >= offset_bits);
1971
1972 /* If the elements are wider than the offset, convert the offset to the
1973 same width, without changing its sign. */
1974 if (element_bits > offset_bits)
1975 {
1976 bool unsigned_p = TYPE_UNSIGNED (offset_type);
1977 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
1978 gs_info->offset = fold_convert (offset_type, gs_info->offset);
1979 }
1980
1981 if (dump_enabled_p ())
1982 dump_printf_loc (MSG_NOTE, vect_location,
1983 "using gather/scatter for strided/grouped access,"
1984 " scale = %d\n", gs_info->scale);
1985
1986 return true;
1987}
1988
62da9e14
RS
1989/* STMT is a non-strided load or store, meaning that it accesses
1990 elements with a known constant step. Return -1 if that step
1991 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1992
1993static int
1994compare_step_with_zero (gimple *stmt)
1995{
1996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3f5e8a76
RS
1997 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1998 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1999 size_zero_node);
62da9e14
RS
2000}
2001
2002/* If the target supports a permute mask that reverses the elements in
2003 a vector of type VECTYPE, return that mask, otherwise return null. */
2004
2005static tree
2006perm_mask_for_reverse (tree vectype)
2007{
928686b1 2008 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
62da9e14 2009
d980067b
RS
2010 /* The encoding has a single stepped pattern. */
2011 vec_perm_builder sel (nunits, 1, 3);
928686b1 2012 for (int i = 0; i < 3; ++i)
908a1a16 2013 sel.quick_push (nunits - 1 - i);
62da9e14 2014
e3342de4
RS
2015 vec_perm_indices indices (sel, 1, nunits);
2016 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
62da9e14 2017 return NULL_TREE;
e3342de4 2018 return vect_gen_perm_mask_checked (vectype, indices);
62da9e14 2019}
5ce9450f 2020
c3a8f964
RS
2021/* STMT is either a masked or unconditional store. Return the value
2022 being stored. */
2023
f307441a 2024tree
c3a8f964
RS
2025vect_get_store_rhs (gimple *stmt)
2026{
2027 if (gassign *assign = dyn_cast <gassign *> (stmt))
2028 {
2029 gcc_assert (gimple_assign_single_p (assign));
2030 return gimple_assign_rhs1 (assign);
2031 }
2032 if (gcall *call = dyn_cast <gcall *> (stmt))
2033 {
2034 internal_fn ifn = gimple_call_internal_fn (call);
f307441a
RS
2035 int index = internal_fn_stored_value_index (ifn);
2036 gcc_assert (index >= 0);
2037 return gimple_call_arg (stmt, index);
c3a8f964
RS
2038 }
2039 gcc_unreachable ();
2040}
2041
2de001ee
RS
2042/* A subroutine of get_load_store_type, with a subset of the same
2043 arguments. Handle the case where STMT is part of a grouped load
2044 or store.
2045
2046 For stores, the statements in the group are all consecutive
2047 and there is no gap at the end. For loads, the statements in the
2048 group might not be consecutive; there can be gaps between statements
2049 as well as at the end. */
2050
2051static bool
2052get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
7e11fc7f 2053 bool masked_p, vec_load_store_type vls_type,
429ef523
RS
2054 vect_memory_access_type *memory_access_type,
2055 gather_scatter_info *gs_info)
2de001ee
RS
2056{
2057 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2058 vec_info *vinfo = stmt_info->vinfo;
2059 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2060 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2061 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
f702e7d4 2062 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2de001ee
RS
2063 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2064 bool single_element_p = (stmt == first_stmt
2065 && !GROUP_NEXT_ELEMENT (stmt_info));
2066 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
928686b1 2067 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2068
2069 /* True if the vectorized statements would access beyond the last
2070 statement in the group. */
2071 bool overrun_p = false;
2072
2073 /* True if we can cope with such overrun by peeling for gaps, so that
2074 there is at least one final scalar iteration after the vector loop. */
7e11fc7f
RS
2075 bool can_overrun_p = (!masked_p
2076 && vls_type == VLS_LOAD
2077 && loop_vinfo
2078 && !loop->inner);
2de001ee
RS
2079
2080 /* There can only be a gap at the end of the group if the stride is
2081 known at compile time. */
2082 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2083
2084 /* Stores can't yet have gaps. */
2085 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2086
2087 if (slp)
2088 {
2089 if (STMT_VINFO_STRIDED_P (stmt_info))
2090 {
2091 /* Try to use consecutive accesses of GROUP_SIZE elements,
2092 separated by the stride, until we have a complete vector.
2093 Fall back to scalar accesses if that isn't possible. */
928686b1 2094 if (multiple_p (nunits, group_size))
2de001ee
RS
2095 *memory_access_type = VMAT_STRIDED_SLP;
2096 else
2097 *memory_access_type = VMAT_ELEMENTWISE;
2098 }
2099 else
2100 {
2101 overrun_p = loop_vinfo && gap != 0;
2102 if (overrun_p && vls_type != VLS_LOAD)
2103 {
2104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2105 "Grouped store with gaps requires"
2106 " non-consecutive accesses\n");
2107 return false;
2108 }
f702e7d4
RS
2109 /* An overrun is fine if the trailing elements are smaller
2110 than the alignment boundary B. Every vector access will
2111 be a multiple of B and so we are guaranteed to access a
2112 non-gap element in the same B-sized block. */
f9ef2c76 2113 if (overrun_p
f702e7d4
RS
2114 && gap < (vect_known_alignment_in_bytes (first_dr)
2115 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2116 overrun_p = false;
2de001ee
RS
2117 if (overrun_p && !can_overrun_p)
2118 {
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2121 "Peeling for outer loop is not supported\n");
2122 return false;
2123 }
2124 *memory_access_type = VMAT_CONTIGUOUS;
2125 }
2126 }
2127 else
2128 {
2129 /* We can always handle this case using elementwise accesses,
2130 but see if something more efficient is available. */
2131 *memory_access_type = VMAT_ELEMENTWISE;
2132
2133 /* If there is a gap at the end of the group then these optimizations
2134 would access excess elements in the last iteration. */
2135 bool would_overrun_p = (gap != 0);
f702e7d4
RS
2136 /* An overrun is fine if the trailing elements are smaller than the
2137 alignment boundary B. Every vector access will be a multiple of B
2138 and so we are guaranteed to access a non-gap element in the
2139 same B-sized block. */
f9ef2c76 2140 if (would_overrun_p
7e11fc7f 2141 && !masked_p
f702e7d4
RS
2142 && gap < (vect_known_alignment_in_bytes (first_dr)
2143 / vect_get_scalar_dr_size (first_dr)))
f9ef2c76 2144 would_overrun_p = false;
f702e7d4 2145
2de001ee 2146 if (!STMT_VINFO_STRIDED_P (stmt_info)
62da9e14
RS
2147 && (can_overrun_p || !would_overrun_p)
2148 && compare_step_with_zero (stmt) > 0)
2de001ee 2149 {
6737facb
RS
2150 /* First cope with the degenerate case of a single-element
2151 vector. */
2152 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2153 *memory_access_type = VMAT_CONTIGUOUS;
2154
2155 /* Otherwise try using LOAD/STORE_LANES. */
2156 if (*memory_access_type == VMAT_ELEMENTWISE
2157 && (vls_type == VLS_LOAD
7e11fc7f
RS
2158 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2159 : vect_store_lanes_supported (vectype, group_size,
2160 masked_p)))
2de001ee
RS
2161 {
2162 *memory_access_type = VMAT_LOAD_STORE_LANES;
2163 overrun_p = would_overrun_p;
2164 }
2165
2166 /* If that fails, try using permuting loads. */
2167 if (*memory_access_type == VMAT_ELEMENTWISE
2168 && (vls_type == VLS_LOAD
2169 ? vect_grouped_load_supported (vectype, single_element_p,
2170 group_size)
2171 : vect_grouped_store_supported (vectype, group_size)))
2172 {
2173 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2174 overrun_p = would_overrun_p;
2175 }
2176 }
429ef523
RS
2177
2178 /* As a last resort, trying using a gather load or scatter store.
2179
2180 ??? Although the code can handle all group sizes correctly,
2181 it probably isn't a win to use separate strided accesses based
2182 on nearby locations. Or, even if it's a win over scalar code,
2183 it might not be a win over vectorizing at a lower VF, if that
2184 allows us to use contiguous accesses. */
2185 if (*memory_access_type == VMAT_ELEMENTWISE
2186 && single_element_p
2187 && loop_vinfo
2188 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2189 masked_p, gs_info))
2190 *memory_access_type = VMAT_GATHER_SCATTER;
2de001ee
RS
2191 }
2192
2193 if (vls_type != VLS_LOAD && first_stmt == stmt)
2194 {
2195 /* STMT is the leader of the group. Check the operands of all the
2196 stmts of the group. */
2197 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2198 while (next_stmt)
2199 {
7e11fc7f 2200 tree op = vect_get_store_rhs (next_stmt);
2de001ee
RS
2201 gimple *def_stmt;
2202 enum vect_def_type dt;
2203 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2204 {
2205 if (dump_enabled_p ())
2206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2207 "use not simple.\n");
2208 return false;
2209 }
2210 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2211 }
2212 }
2213
2214 if (overrun_p)
2215 {
2216 gcc_assert (can_overrun_p);
2217 if (dump_enabled_p ())
2218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2219 "Data access with gaps requires scalar "
2220 "epilogue loop\n");
2221 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2222 }
2223
2224 return true;
2225}
2226
62da9e14
RS
2227/* A subroutine of get_load_store_type, with a subset of the same
2228 arguments. Handle the case where STMT is a load or store that
2229 accesses consecutive elements with a negative step. */
2230
2231static vect_memory_access_type
2232get_negative_load_store_type (gimple *stmt, tree vectype,
2233 vec_load_store_type vls_type,
2234 unsigned int ncopies)
2235{
2236 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2237 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2238 dr_alignment_support alignment_support_scheme;
2239
2240 if (ncopies > 1)
2241 {
2242 if (dump_enabled_p ())
2243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244 "multiple types with negative step.\n");
2245 return VMAT_ELEMENTWISE;
2246 }
2247
2248 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2249 if (alignment_support_scheme != dr_aligned
2250 && alignment_support_scheme != dr_unaligned_supported)
2251 {
2252 if (dump_enabled_p ())
2253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254 "negative step but alignment required.\n");
2255 return VMAT_ELEMENTWISE;
2256 }
2257
2258 if (vls_type == VLS_STORE_INVARIANT)
2259 {
2260 if (dump_enabled_p ())
2261 dump_printf_loc (MSG_NOTE, vect_location,
2262 "negative step with invariant source;"
2263 " no permute needed.\n");
2264 return VMAT_CONTIGUOUS_DOWN;
2265 }
2266
2267 if (!perm_mask_for_reverse (vectype))
2268 {
2269 if (dump_enabled_p ())
2270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2271 "negative step and reversing not supported.\n");
2272 return VMAT_ELEMENTWISE;
2273 }
2274
2275 return VMAT_CONTIGUOUS_REVERSE;
2276}
2277
2de001ee
RS
2278/* Analyze load or store statement STMT of type VLS_TYPE. Return true
2279 if there is a memory access type that the vectorized form can use,
2280 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2281 or scatters, fill in GS_INFO accordingly.
2282
2283 SLP says whether we're performing SLP rather than loop vectorization.
7e11fc7f 2284 MASKED_P is true if the statement is conditional on a vectorized mask.
62da9e14
RS
2285 VECTYPE is the vector type that the vectorized statements will use.
2286 NCOPIES is the number of vector statements that will be needed. */
2de001ee
RS
2287
2288static bool
7e11fc7f 2289get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
62da9e14 2290 vec_load_store_type vls_type, unsigned int ncopies,
2de001ee
RS
2291 vect_memory_access_type *memory_access_type,
2292 gather_scatter_info *gs_info)
2293{
2294 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2295 vec_info *vinfo = stmt_info->vinfo;
2296 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4d694b27 2297 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2de001ee
RS
2298 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2299 {
2300 *memory_access_type = VMAT_GATHER_SCATTER;
2301 gimple *def_stmt;
2302 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2303 gcc_unreachable ();
2304 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2305 &gs_info->offset_dt,
2306 &gs_info->offset_vectype))
2307 {
2308 if (dump_enabled_p ())
2309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2310 "%s index use not simple.\n",
2311 vls_type == VLS_LOAD ? "gather" : "scatter");
2312 return false;
2313 }
2314 }
2315 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2316 {
7e11fc7f 2317 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
429ef523 2318 memory_access_type, gs_info))
2de001ee
RS
2319 return false;
2320 }
2321 else if (STMT_VINFO_STRIDED_P (stmt_info))
2322 {
2323 gcc_assert (!slp);
ab2fc782 2324 if (loop_vinfo
429ef523
RS
2325 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2326 masked_p, gs_info))
ab2fc782
RS
2327 *memory_access_type = VMAT_GATHER_SCATTER;
2328 else
2329 *memory_access_type = VMAT_ELEMENTWISE;
2de001ee
RS
2330 }
2331 else
62da9e14
RS
2332 {
2333 int cmp = compare_step_with_zero (stmt);
2334 if (cmp < 0)
2335 *memory_access_type = get_negative_load_store_type
2336 (stmt, vectype, vls_type, ncopies);
2337 else if (cmp == 0)
2338 {
2339 gcc_assert (vls_type == VLS_LOAD);
2340 *memory_access_type = VMAT_INVARIANT;
2341 }
2342 else
2343 *memory_access_type = VMAT_CONTIGUOUS;
2344 }
2de001ee 2345
4d694b27
RS
2346 if ((*memory_access_type == VMAT_ELEMENTWISE
2347 || *memory_access_type == VMAT_STRIDED_SLP)
2348 && !nunits.is_constant ())
2349 {
2350 if (dump_enabled_p ())
2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 "Not using elementwise accesses due to variable "
2353 "vectorization factor.\n");
2354 return false;
2355 }
2356
2de001ee
RS
2357 /* FIXME: At the moment the cost model seems to underestimate the
2358 cost of using elementwise accesses. This check preserves the
2359 traditional behavior until that can be fixed. */
2360 if (*memory_access_type == VMAT_ELEMENTWISE
4aa157e8
RS
2361 && !STMT_VINFO_STRIDED_P (stmt_info)
2362 && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2363 && !GROUP_NEXT_ELEMENT (stmt_info)
2364 && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2de001ee
RS
2365 {
2366 if (dump_enabled_p ())
2367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368 "not falling back to elementwise accesses\n");
2369 return false;
2370 }
2371 return true;
2372}
2373
aaeefd88 2374/* Return true if boolean argument MASK is suitable for vectorizing
929b4411
RS
2375 conditional load or store STMT. When returning true, store the type
2376 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2377 in *MASK_VECTYPE_OUT. */
aaeefd88
RS
2378
2379static bool
929b4411
RS
2380vect_check_load_store_mask (gimple *stmt, tree mask,
2381 vect_def_type *mask_dt_out,
2382 tree *mask_vectype_out)
aaeefd88
RS
2383{
2384 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2385 {
2386 if (dump_enabled_p ())
2387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2388 "mask argument is not a boolean.\n");
2389 return false;
2390 }
2391
2392 if (TREE_CODE (mask) != SSA_NAME)
2393 {
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 "mask argument is not an SSA name.\n");
2397 return false;
2398 }
2399
2400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2401 gimple *def_stmt;
929b4411 2402 enum vect_def_type mask_dt;
aaeefd88 2403 tree mask_vectype;
929b4411 2404 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
aaeefd88
RS
2405 &mask_vectype))
2406 {
2407 if (dump_enabled_p ())
2408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409 "mask use not simple.\n");
2410 return false;
2411 }
2412
2413 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2414 if (!mask_vectype)
2415 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2416
2417 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2418 {
2419 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421 "could not find an appropriate vector mask type.\n");
2422 return false;
2423 }
2424
2425 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2426 TYPE_VECTOR_SUBPARTS (vectype)))
2427 {
2428 if (dump_enabled_p ())
2429 {
2430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2431 "vector mask type ");
2432 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2433 dump_printf (MSG_MISSED_OPTIMIZATION,
2434 " does not match vector data type ");
2435 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2436 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2437 }
2438 return false;
2439 }
2440
929b4411 2441 *mask_dt_out = mask_dt;
aaeefd88
RS
2442 *mask_vectype_out = mask_vectype;
2443 return true;
2444}
2445
3133c3b6
RS
2446/* Return true if stored value RHS is suitable for vectorizing store
2447 statement STMT. When returning true, store the type of the
929b4411
RS
2448 definition in *RHS_DT_OUT, the type of the vectorized store value in
2449 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
3133c3b6
RS
2450
2451static bool
929b4411
RS
2452vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2453 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
3133c3b6
RS
2454{
2455 /* In the case this is a store from a constant make sure
2456 native_encode_expr can handle it. */
2457 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2458 {
2459 if (dump_enabled_p ())
2460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461 "cannot encode constant as a byte sequence.\n");
2462 return false;
2463 }
2464
2465 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2466 gimple *def_stmt;
929b4411 2467 enum vect_def_type rhs_dt;
3133c3b6 2468 tree rhs_vectype;
929b4411 2469 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
3133c3b6
RS
2470 &rhs_vectype))
2471 {
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474 "use not simple.\n");
2475 return false;
2476 }
2477
2478 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2480 {
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 "incompatible vector types.\n");
2484 return false;
2485 }
2486
929b4411 2487 *rhs_dt_out = rhs_dt;
3133c3b6 2488 *rhs_vectype_out = rhs_vectype;
929b4411 2489 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
3133c3b6
RS
2490 *vls_type_out = VLS_STORE_INVARIANT;
2491 else
2492 *vls_type_out = VLS_STORE;
2493 return true;
2494}
2495
bc9587eb
RS
2496/* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2497 Note that we support masks with floating-point type, in which case the
2498 floats are interpreted as a bitmask. */
2499
2500static tree
2501vect_build_all_ones_mask (gimple *stmt, tree masktype)
2502{
2503 if (TREE_CODE (masktype) == INTEGER_TYPE)
2504 return build_int_cst (masktype, -1);
2505 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2506 {
2507 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2508 mask = build_vector_from_val (masktype, mask);
2509 return vect_init_vector (stmt, mask, masktype, NULL);
2510 }
2511 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2512 {
2513 REAL_VALUE_TYPE r;
2514 long tmp[6];
2515 for (int j = 0; j < 6; ++j)
2516 tmp[j] = -1;
2517 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2518 tree mask = build_real (TREE_TYPE (masktype), r);
2519 mask = build_vector_from_val (masktype, mask);
2520 return vect_init_vector (stmt, mask, masktype, NULL);
2521 }
2522 gcc_unreachable ();
2523}
2524
2525/* Build an all-zero merge value of type VECTYPE while vectorizing
2526 STMT as a gather load. */
2527
2528static tree
2529vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2530{
2531 tree merge;
2532 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2533 merge = build_int_cst (TREE_TYPE (vectype), 0);
2534 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2535 {
2536 REAL_VALUE_TYPE r;
2537 long tmp[6];
2538 for (int j = 0; j < 6; ++j)
2539 tmp[j] = 0;
2540 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2541 merge = build_real (TREE_TYPE (vectype), r);
2542 }
2543 else
2544 gcc_unreachable ();
2545 merge = build_vector_from_val (vectype, merge);
2546 return vect_init_vector (stmt, merge, vectype, NULL);
2547}
2548
c48d2d35
RS
2549/* Build a gather load call while vectorizing STMT. Insert new instructions
2550 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2551 operation. If the load is conditional, MASK is the unvectorized
929b4411 2552 condition and MASK_DT is its definition type, otherwise MASK is null. */
c48d2d35
RS
2553
2554static void
2555vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2556 gimple **vec_stmt, gather_scatter_info *gs_info,
929b4411 2557 tree mask, vect_def_type mask_dt)
c48d2d35
RS
2558{
2559 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2560 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2561 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2562 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2563 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2564 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2565 edge pe = loop_preheader_edge (loop);
2566 enum { NARROW, NONE, WIDEN } modifier;
2567 poly_uint64 gather_off_nunits
2568 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2569
2570 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2571 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2572 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2573 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2574 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2575 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2576 tree scaletype = TREE_VALUE (arglist);
2577 gcc_checking_assert (types_compatible_p (srctype, rettype)
2578 && (!mask || types_compatible_p (srctype, masktype)));
2579
2580 tree perm_mask = NULL_TREE;
2581 tree mask_perm_mask = NULL_TREE;
2582 if (known_eq (nunits, gather_off_nunits))
2583 modifier = NONE;
2584 else if (known_eq (nunits * 2, gather_off_nunits))
2585 {
2586 modifier = WIDEN;
2587
2588 /* Currently widening gathers and scatters are only supported for
2589 fixed-length vectors. */
2590 int count = gather_off_nunits.to_constant ();
2591 vec_perm_builder sel (count, count, 1);
2592 for (int i = 0; i < count; ++i)
2593 sel.quick_push (i | (count / 2));
2594
2595 vec_perm_indices indices (sel, 1, count);
2596 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2597 indices);
2598 }
2599 else if (known_eq (nunits, gather_off_nunits * 2))
2600 {
2601 modifier = NARROW;
2602
2603 /* Currently narrowing gathers and scatters are only supported for
2604 fixed-length vectors. */
2605 int count = nunits.to_constant ();
2606 vec_perm_builder sel (count, count, 1);
2607 sel.quick_grow (count);
2608 for (int i = 0; i < count; ++i)
2609 sel[i] = i < count / 2 ? i : i + count / 2;
2610 vec_perm_indices indices (sel, 2, count);
2611 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2612
2613 ncopies *= 2;
2614
2615 if (mask)
2616 {
2617 for (int i = 0; i < count; ++i)
2618 sel[i] = i | (count / 2);
2619 indices.new_vector (sel, 2, count);
2620 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2621 }
2622 }
2623 else
2624 gcc_unreachable ();
2625
2626 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2627 vectype);
2628
2629 tree ptr = fold_convert (ptrtype, gs_info->base);
2630 if (!is_gimple_min_invariant (ptr))
2631 {
2632 gimple_seq seq;
2633 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2634 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2635 gcc_assert (!new_bb);
2636 }
2637
2638 tree scale = build_int_cst (scaletype, gs_info->scale);
2639
2640 tree vec_oprnd0 = NULL_TREE;
2641 tree vec_mask = NULL_TREE;
2642 tree src_op = NULL_TREE;
2643 tree mask_op = NULL_TREE;
2644 tree prev_res = NULL_TREE;
2645 stmt_vec_info prev_stmt_info = NULL;
2646
2647 if (!mask)
2648 {
2649 src_op = vect_build_zero_merge_argument (stmt, rettype);
2650 mask_op = vect_build_all_ones_mask (stmt, masktype);
2651 }
2652
2653 for (int j = 0; j < ncopies; ++j)
2654 {
2655 tree op, var;
2656 gimple *new_stmt;
2657 if (modifier == WIDEN && (j & 1))
2658 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2659 perm_mask, stmt, gsi);
2660 else if (j == 0)
2661 op = vec_oprnd0
2662 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2663 else
2664 op = vec_oprnd0
2665 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2666
2667 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2668 {
2669 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2670 TYPE_VECTOR_SUBPARTS (idxtype)));
2671 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2672 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2673 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2674 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2675 op = var;
2676 }
2677
2678 if (mask)
2679 {
2680 if (mask_perm_mask && (j & 1))
2681 mask_op = permute_vec_elements (mask_op, mask_op,
2682 mask_perm_mask, stmt, gsi);
2683 else
2684 {
2685 if (j == 0)
2686 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2687 else
929b4411 2688 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c48d2d35
RS
2689
2690 mask_op = vec_mask;
2691 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2692 {
2693 gcc_assert
2694 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2695 TYPE_VECTOR_SUBPARTS (masktype)));
2696 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2697 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2698 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2699 mask_op);
2700 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2701 mask_op = var;
2702 }
2703 }
2704 src_op = mask_op;
2705 }
2706
2707 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2708 mask_op, scale);
2709
2710 if (!useless_type_conversion_p (vectype, rettype))
2711 {
2712 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2713 TYPE_VECTOR_SUBPARTS (rettype)));
2714 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2715 gimple_call_set_lhs (new_stmt, op);
2716 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2717 var = make_ssa_name (vec_dest);
2718 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2719 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2720 }
2721 else
2722 {
2723 var = make_ssa_name (vec_dest, new_stmt);
2724 gimple_call_set_lhs (new_stmt, var);
2725 }
2726
2727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2728
2729 if (modifier == NARROW)
2730 {
2731 if ((j & 1) == 0)
2732 {
2733 prev_res = var;
2734 continue;
2735 }
2736 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2737 new_stmt = SSA_NAME_DEF_STMT (var);
2738 }
2739
2740 if (prev_stmt_info == NULL)
2741 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2742 else
2743 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2744 prev_stmt_info = vinfo_for_stmt (new_stmt);
2745 }
2746}
2747
bfaa08b7
RS
2748/* Prepare the base and offset in GS_INFO for vectorization.
2749 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2750 to the vectorized offset argument for the first copy of STMT. STMT
2751 is the statement described by GS_INFO and LOOP is the containing loop. */
2752
2753static void
2754vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2755 gather_scatter_info *gs_info,
2756 tree *dataref_ptr, tree *vec_offset)
2757{
2758 gimple_seq stmts = NULL;
2759 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2760 if (stmts != NULL)
2761 {
2762 basic_block new_bb;
2763 edge pe = loop_preheader_edge (loop);
2764 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2765 gcc_assert (!new_bb);
2766 }
2767 tree offset_type = TREE_TYPE (gs_info->offset);
2768 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2769 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2770 offset_vectype);
2771}
2772
ab2fc782
RS
2773/* Prepare to implement a grouped or strided load or store using
2774 the gather load or scatter store operation described by GS_INFO.
2775 STMT is the load or store statement.
2776
2777 Set *DATAREF_BUMP to the amount that should be added to the base
2778 address after each copy of the vectorized statement. Set *VEC_OFFSET
2779 to an invariant offset vector in which element I has the value
2780 I * DR_STEP / SCALE. */
2781
2782static void
2783vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2784 gather_scatter_info *gs_info,
2785 tree *dataref_bump, tree *vec_offset)
2786{
2787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2788 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2789 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2790 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2791 gimple_seq stmts;
2792
2793 tree bump = size_binop (MULT_EXPR,
2794 fold_convert (sizetype, DR_STEP (dr)),
2795 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2796 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2797 if (stmts)
2798 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2799
2800 /* The offset given in GS_INFO can have pointer type, so use the element
2801 type of the vector instead. */
2802 tree offset_type = TREE_TYPE (gs_info->offset);
2803 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2804 offset_type = TREE_TYPE (offset_vectype);
2805
2806 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2807 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2808 ssize_int (gs_info->scale));
2809 step = fold_convert (offset_type, step);
2810 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2811
2812 /* Create {0, X, X*2, X*3, ...}. */
2813 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2814 build_zero_cst (offset_type), step);
2815 if (stmts)
2816 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2817}
2818
2819/* Return the amount that should be added to a vector pointer to move
2820 to the next or previous copy of AGGR_TYPE. DR is the data reference
2821 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2822 vectorization. */
2823
2824static tree
2825vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2826 vect_memory_access_type memory_access_type)
2827{
2828 if (memory_access_type == VMAT_INVARIANT)
2829 return size_zero_node;
2830
2831 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2832 tree step = vect_dr_behavior (dr)->step;
2833 if (tree_int_cst_sgn (step) == -1)
2834 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2835 return iv_step;
2836}
2837
37b14185
RB
2838/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2839
2840static bool
2841vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2842 gimple **vec_stmt, slp_tree slp_node,
2843 tree vectype_in, enum vect_def_type *dt)
2844{
2845 tree op, vectype;
2846 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2847 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
928686b1
RS
2848 unsigned ncopies;
2849 unsigned HOST_WIDE_INT nunits, num_bytes;
37b14185
RB
2850
2851 op = gimple_call_arg (stmt, 0);
2852 vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1
RS
2853
2854 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2855 return false;
37b14185
RB
2856
2857 /* Multiple types in SLP are handled by creating the appropriate number of
2858 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2859 case of SLP. */
2860 if (slp_node)
2861 ncopies = 1;
2862 else
e8f142e2 2863 ncopies = vect_get_num_copies (loop_vinfo, vectype);
37b14185
RB
2864
2865 gcc_assert (ncopies >= 1);
2866
2867 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2868 if (! char_vectype)
2869 return false;
2870
928686b1
RS
2871 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2872 return false;
2873
794e3180 2874 unsigned word_bytes = num_bytes / nunits;
908a1a16 2875
d980067b
RS
2876 /* The encoding uses one stepped pattern for each byte in the word. */
2877 vec_perm_builder elts (num_bytes, word_bytes, 3);
2878 for (unsigned i = 0; i < 3; ++i)
37b14185 2879 for (unsigned j = 0; j < word_bytes; ++j)
908a1a16 2880 elts.quick_push ((i + 1) * word_bytes - j - 1);
37b14185 2881
e3342de4
RS
2882 vec_perm_indices indices (elts, 1, num_bytes);
2883 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
37b14185
RB
2884 return false;
2885
2886 if (! vec_stmt)
2887 {
2888 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2889 if (dump_enabled_p ())
2890 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2891 "\n");
78604de0 2892 if (! slp_node)
37b14185
RB
2893 {
2894 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2895 1, vector_stmt, stmt_info, 0, vect_prologue);
2896 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2897 ncopies, vec_perm, stmt_info, 0, vect_body);
2898 }
2899 return true;
2900 }
2901
736d0f28 2902 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
37b14185
RB
2903
2904 /* Transform. */
2905 vec<tree> vec_oprnds = vNULL;
2906 gimple *new_stmt = NULL;
2907 stmt_vec_info prev_stmt_info = NULL;
2908 for (unsigned j = 0; j < ncopies; j++)
2909 {
2910 /* Handle uses. */
2911 if (j == 0)
306b0c92 2912 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
37b14185
RB
2913 else
2914 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2915
2916 /* Arguments are ready. create the new vector stmt. */
2917 unsigned i;
2918 tree vop;
2919 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2920 {
2921 tree tem = make_ssa_name (char_vectype);
2922 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2923 char_vectype, vop));
2924 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2925 tree tem2 = make_ssa_name (char_vectype);
2926 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2927 tem, tem, bswap_vconst);
2928 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2929 tem = make_ssa_name (vectype);
2930 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2931 vectype, tem2));
2932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2933 if (slp_node)
2934 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2935 }
2936
2937 if (slp_node)
2938 continue;
2939
2940 if (j == 0)
2941 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2942 else
2943 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2944
2945 prev_stmt_info = vinfo_for_stmt (new_stmt);
2946 }
2947
2948 vec_oprnds.release ();
2949 return true;
2950}
2951
b1b6836e
RS
2952/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2953 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2954 in a single step. On success, store the binary pack code in
2955 *CONVERT_CODE. */
2956
2957static bool
2958simple_integer_narrowing (tree vectype_out, tree vectype_in,
2959 tree_code *convert_code)
2960{
2961 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2962 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2963 return false;
2964
2965 tree_code code;
2966 int multi_step_cvt = 0;
2967 auto_vec <tree, 8> interm_types;
2968 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2969 &code, &multi_step_cvt,
2970 &interm_types)
2971 || multi_step_cvt)
2972 return false;
2973
2974 *convert_code = code;
2975 return true;
2976}
5ce9450f 2977
ebfd146a
IR
2978/* Function vectorizable_call.
2979
538dd0b7 2980 Check if GS performs a function call that can be vectorized.
b8698a0f 2981 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2982 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2983 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2984
2985static bool
355fe088 2986vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2987 slp_tree slp_node)
ebfd146a 2988{
538dd0b7 2989 gcall *stmt;
ebfd146a
IR
2990 tree vec_dest;
2991 tree scalar_dest;
2992 tree op, type;
2993 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2994 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a 2995 tree vectype_out, vectype_in;
c7bda0f4
RS
2996 poly_uint64 nunits_in;
2997 poly_uint64 nunits_out;
ebfd146a 2998 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2999 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3000 vec_info *vinfo = stmt_info->vinfo;
81c40241 3001 tree fndecl, new_temp, rhs_type;
355fe088 3002 gimple *def_stmt;
0502fb85
UB
3003 enum vect_def_type dt[3]
3004 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 3005 int ndts = 3;
355fe088 3006 gimple *new_stmt = NULL;
ebfd146a 3007 int ncopies, j;
6e1aa848 3008 vec<tree> vargs = vNULL;
ebfd146a
IR
3009 enum { NARROW, NONE, WIDEN } modifier;
3010 size_t i, nargs;
9d5e7640 3011 tree lhs;
ebfd146a 3012
190c2236 3013 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3014 return false;
3015
66c16fd9
RB
3016 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3017 && ! vec_stmt)
ebfd146a
IR
3018 return false;
3019
538dd0b7
DM
3020 /* Is GS a vectorizable call? */
3021 stmt = dyn_cast <gcall *> (gs);
3022 if (!stmt)
ebfd146a
IR
3023 return false;
3024
5ce9450f 3025 if (gimple_call_internal_p (stmt)
bfaa08b7 3026 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
f307441a 3027 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
c3a8f964
RS
3028 /* Handled by vectorizable_load and vectorizable_store. */
3029 return false;
5ce9450f 3030
0136f8f0
AH
3031 if (gimple_call_lhs (stmt) == NULL_TREE
3032 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
3033 return false;
3034
0136f8f0 3035 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 3036
b690cc0f
RG
3037 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3038
ebfd146a
IR
3039 /* Process function arguments. */
3040 rhs_type = NULL_TREE;
b690cc0f 3041 vectype_in = NULL_TREE;
ebfd146a
IR
3042 nargs = gimple_call_num_args (stmt);
3043
1b1562a5
MM
3044 /* Bail out if the function has more than three arguments, we do not have
3045 interesting builtin functions to vectorize with more than two arguments
3046 except for fma. No arguments is also not good. */
3047 if (nargs == 0 || nargs > 3)
ebfd146a
IR
3048 return false;
3049
74bf76ed
JJ
3050 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3051 if (gimple_call_internal_p (stmt)
3052 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3053 {
3054 nargs = 0;
3055 rhs_type = unsigned_type_node;
3056 }
3057
ebfd146a
IR
3058 for (i = 0; i < nargs; i++)
3059 {
b690cc0f
RG
3060 tree opvectype;
3061
ebfd146a
IR
3062 op = gimple_call_arg (stmt, i);
3063
3064 /* We can only handle calls with arguments of the same type. */
3065 if (rhs_type
8533c9d8 3066 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 3067 {
73fbfcad 3068 if (dump_enabled_p ())
78c60e3d 3069 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3070 "argument types differ.\n");
ebfd146a
IR
3071 return false;
3072 }
b690cc0f
RG
3073 if (!rhs_type)
3074 rhs_type = TREE_TYPE (op);
ebfd146a 3075
81c40241 3076 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 3077 {
73fbfcad 3078 if (dump_enabled_p ())
78c60e3d 3079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3080 "use not simple.\n");
ebfd146a
IR
3081 return false;
3082 }
ebfd146a 3083
b690cc0f
RG
3084 if (!vectype_in)
3085 vectype_in = opvectype;
3086 else if (opvectype
3087 && opvectype != vectype_in)
3088 {
73fbfcad 3089 if (dump_enabled_p ())
78c60e3d 3090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3091 "argument vector types differ.\n");
b690cc0f
RG
3092 return false;
3093 }
3094 }
3095 /* If all arguments are external or constant defs use a vector type with
3096 the same size as the output vector type. */
ebfd146a 3097 if (!vectype_in)
b690cc0f 3098 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3099 if (vec_stmt)
3100 gcc_assert (vectype_in);
3101 if (!vectype_in)
3102 {
73fbfcad 3103 if (dump_enabled_p ())
7d8930a0 3104 {
78c60e3d
SS
3105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3106 "no vectype for scalar type ");
3107 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3108 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
3109 }
3110
3111 return false;
3112 }
ebfd146a
IR
3113
3114 /* FORNOW */
b690cc0f
RG
3115 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3116 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
c7bda0f4 3117 if (known_eq (nunits_in * 2, nunits_out))
ebfd146a 3118 modifier = NARROW;
c7bda0f4 3119 else if (known_eq (nunits_out, nunits_in))
ebfd146a 3120 modifier = NONE;
c7bda0f4 3121 else if (known_eq (nunits_out * 2, nunits_in))
ebfd146a
IR
3122 modifier = WIDEN;
3123 else
3124 return false;
3125
70439f0d
RS
3126 /* We only handle functions that do not read or clobber memory. */
3127 if (gimple_vuse (stmt))
3128 {
3129 if (dump_enabled_p ())
3130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3131 "function reads from or writes to memory.\n");
3132 return false;
3133 }
3134
ebfd146a
IR
3135 /* For now, we only vectorize functions if a target specific builtin
3136 is available. TODO -- in some cases, it might be profitable to
3137 insert the calls for pieces of the vector, in order to be able
3138 to vectorize other operations in the loop. */
70439f0d
RS
3139 fndecl = NULL_TREE;
3140 internal_fn ifn = IFN_LAST;
3141 combined_fn cfn = gimple_call_combined_fn (stmt);
3142 tree callee = gimple_call_fndecl (stmt);
3143
3144 /* First try using an internal function. */
b1b6836e
RS
3145 tree_code convert_code = ERROR_MARK;
3146 if (cfn != CFN_LAST
3147 && (modifier == NONE
3148 || (modifier == NARROW
3149 && simple_integer_narrowing (vectype_out, vectype_in,
3150 &convert_code))))
70439f0d
RS
3151 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3152 vectype_in);
3153
3154 /* If that fails, try asking for a target-specific built-in function. */
3155 if (ifn == IFN_LAST)
3156 {
3157 if (cfn != CFN_LAST)
3158 fndecl = targetm.vectorize.builtin_vectorized_function
3159 (cfn, vectype_out, vectype_in);
7672aa9b 3160 else if (callee)
70439f0d
RS
3161 fndecl = targetm.vectorize.builtin_md_vectorized_function
3162 (callee, vectype_out, vectype_in);
3163 }
3164
3165 if (ifn == IFN_LAST && !fndecl)
ebfd146a 3166 {
70439f0d 3167 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
3168 && !slp_node
3169 && loop_vinfo
3170 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3171 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3172 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3173 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3174 {
3175 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3176 { 0, 1, 2, ... vf - 1 } vector. */
3177 gcc_assert (nargs == 0);
3178 }
37b14185
RB
3179 else if (modifier == NONE
3180 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3181 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3182 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3183 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3184 vectype_in, dt);
74bf76ed
JJ
3185 else
3186 {
3187 if (dump_enabled_p ())
3188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3189 "function is not vectorizable.\n");
74bf76ed
JJ
3190 return false;
3191 }
ebfd146a
IR
3192 }
3193
fce57248 3194 if (slp_node)
190c2236 3195 ncopies = 1;
b1b6836e 3196 else if (modifier == NARROW && ifn == IFN_LAST)
e8f142e2 3197 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
ebfd146a 3198 else
e8f142e2 3199 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
ebfd146a
IR
3200
3201 /* Sanity check: make sure that at least one copy of the vectorized stmt
3202 needs to be generated. */
3203 gcc_assert (ncopies >= 1);
3204
3205 if (!vec_stmt) /* transformation not required. */
3206 {
3207 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 3208 if (dump_enabled_p ())
e645e942
TJ
3209 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3210 "\n");
78604de0
RB
3211 if (!slp_node)
3212 {
3213 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
3214 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3215 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
3216 vec_promote_demote, stmt_info, 0, vect_body);
3217 }
b1b6836e 3218
ebfd146a
IR
3219 return true;
3220 }
3221
67b8dbac 3222 /* Transform. */
ebfd146a 3223
73fbfcad 3224 if (dump_enabled_p ())
e645e942 3225 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
3226
3227 /* Handle def. */
3228 scalar_dest = gimple_call_lhs (stmt);
3229 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3230
3231 prev_stmt_info = NULL;
b1b6836e 3232 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 3233 {
b1b6836e 3234 tree prev_res = NULL_TREE;
ebfd146a
IR
3235 for (j = 0; j < ncopies; ++j)
3236 {
3237 /* Build argument list for the vectorized call. */
3238 if (j == 0)
9771b263 3239 vargs.create (nargs);
ebfd146a 3240 else
9771b263 3241 vargs.truncate (0);
ebfd146a 3242
190c2236
JJ
3243 if (slp_node)
3244 {
ef062b13 3245 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3246 vec<tree> vec_oprnds0;
190c2236
JJ
3247
3248 for (i = 0; i < nargs; i++)
9771b263 3249 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3250 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3251 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3252
3253 /* Arguments are ready. Create the new vector stmt. */
9771b263 3254 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
3255 {
3256 size_t k;
3257 for (k = 0; k < nargs; k++)
3258 {
37b5ec8f 3259 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 3260 vargs[k] = vec_oprndsk[i];
190c2236 3261 }
b1b6836e
RS
3262 if (modifier == NARROW)
3263 {
3264 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3265 gcall *call
3266 = gimple_build_call_internal_vec (ifn, vargs);
3267 gimple_call_set_lhs (call, half_res);
3268 gimple_call_set_nothrow (call, true);
3269 new_stmt = call;
b1b6836e
RS
3270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3271 if ((i & 1) == 0)
3272 {
3273 prev_res = half_res;
3274 continue;
3275 }
3276 new_temp = make_ssa_name (vec_dest);
3277 new_stmt = gimple_build_assign (new_temp, convert_code,
3278 prev_res, half_res);
3279 }
70439f0d 3280 else
b1b6836e 3281 {
a844293d 3282 gcall *call;
b1b6836e 3283 if (ifn != IFN_LAST)
a844293d 3284 call = gimple_build_call_internal_vec (ifn, vargs);
b1b6836e 3285 else
a844293d
RS
3286 call = gimple_build_call_vec (fndecl, vargs);
3287 new_temp = make_ssa_name (vec_dest, call);
3288 gimple_call_set_lhs (call, new_temp);
3289 gimple_call_set_nothrow (call, true);
3290 new_stmt = call;
b1b6836e 3291 }
190c2236 3292 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3293 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3294 }
3295
3296 for (i = 0; i < nargs; i++)
3297 {
37b5ec8f 3298 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3299 vec_oprndsi.release ();
190c2236 3300 }
190c2236
JJ
3301 continue;
3302 }
3303
ebfd146a
IR
3304 for (i = 0; i < nargs; i++)
3305 {
3306 op = gimple_call_arg (stmt, i);
3307 if (j == 0)
3308 vec_oprnd0
81c40241 3309 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3310 else
63827fb8
IR
3311 {
3312 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3313 vec_oprnd0
3314 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3315 }
ebfd146a 3316
9771b263 3317 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
3318 }
3319
74bf76ed
JJ
3320 if (gimple_call_internal_p (stmt)
3321 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3322 {
c7bda0f4 3323 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
74bf76ed 3324 tree new_var
0e22bb5a 3325 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 3326 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 3327 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 3328 new_temp = make_ssa_name (vec_dest);
0e22bb5a 3329 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 3330 }
b1b6836e
RS
3331 else if (modifier == NARROW)
3332 {
3333 tree half_res = make_ssa_name (vectype_in);
a844293d
RS
3334 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3335 gimple_call_set_lhs (call, half_res);
3336 gimple_call_set_nothrow (call, true);
3337 new_stmt = call;
b1b6836e
RS
3338 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3339 if ((j & 1) == 0)
3340 {
3341 prev_res = half_res;
3342 continue;
3343 }
3344 new_temp = make_ssa_name (vec_dest);
3345 new_stmt = gimple_build_assign (new_temp, convert_code,
3346 prev_res, half_res);
3347 }
74bf76ed
JJ
3348 else
3349 {
a844293d 3350 gcall *call;
70439f0d 3351 if (ifn != IFN_LAST)
a844293d 3352 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3353 else
a844293d 3354 call = gimple_build_call_vec (fndecl, vargs);
74bf76ed 3355 new_temp = make_ssa_name (vec_dest, new_stmt);
a844293d
RS
3356 gimple_call_set_lhs (call, new_temp);
3357 gimple_call_set_nothrow (call, true);
3358 new_stmt = call;
74bf76ed 3359 }
ebfd146a
IR
3360 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361
b1b6836e 3362 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
3363 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3364 else
3365 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3366
3367 prev_stmt_info = vinfo_for_stmt (new_stmt);
3368 }
b1b6836e
RS
3369 }
3370 else if (modifier == NARROW)
3371 {
ebfd146a
IR
3372 for (j = 0; j < ncopies; ++j)
3373 {
3374 /* Build argument list for the vectorized call. */
3375 if (j == 0)
9771b263 3376 vargs.create (nargs * 2);
ebfd146a 3377 else
9771b263 3378 vargs.truncate (0);
ebfd146a 3379
190c2236
JJ
3380 if (slp_node)
3381 {
ef062b13 3382 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 3383 vec<tree> vec_oprnds0;
190c2236
JJ
3384
3385 for (i = 0; i < nargs; i++)
9771b263 3386 vargs.quick_push (gimple_call_arg (stmt, i));
306b0c92 3387 vect_get_slp_defs (vargs, slp_node, &vec_defs);
37b5ec8f 3388 vec_oprnds0 = vec_defs[0];
190c2236
JJ
3389
3390 /* Arguments are ready. Create the new vector stmt. */
9771b263 3391 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
3392 {
3393 size_t k;
9771b263 3394 vargs.truncate (0);
190c2236
JJ
3395 for (k = 0; k < nargs; k++)
3396 {
37b5ec8f 3397 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
3398 vargs.quick_push (vec_oprndsk[i]);
3399 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 3400 }
a844293d 3401 gcall *call;
70439f0d 3402 if (ifn != IFN_LAST)
a844293d 3403 call = gimple_build_call_internal_vec (ifn, vargs);
70439f0d 3404 else
a844293d
RS
3405 call = gimple_build_call_vec (fndecl, vargs);
3406 new_temp = make_ssa_name (vec_dest, call);
3407 gimple_call_set_lhs (call, new_temp);
3408 gimple_call_set_nothrow (call, true);
3409 new_stmt = call;
190c2236 3410 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 3411 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
3412 }
3413
3414 for (i = 0; i < nargs; i++)
3415 {
37b5ec8f 3416 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 3417 vec_oprndsi.release ();
190c2236 3418 }
190c2236
JJ
3419 continue;
3420 }
3421
ebfd146a
IR
3422 for (i = 0; i < nargs; i++)
3423 {
3424 op = gimple_call_arg (stmt, i);
3425 if (j == 0)
3426 {
3427 vec_oprnd0
81c40241 3428 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 3429 vec_oprnd1
63827fb8 3430 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3431 }
3432 else
3433 {
336ecb65 3434 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 3435 vec_oprnd0
63827fb8 3436 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 3437 vec_oprnd1
63827fb8 3438 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
3439 }
3440
9771b263
DN
3441 vargs.quick_push (vec_oprnd0);
3442 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
3443 }
3444
b1b6836e 3445 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
3446 new_temp = make_ssa_name (vec_dest, new_stmt);
3447 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
3448 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3449
3450 if (j == 0)
3451 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3452 else
3453 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3454
3455 prev_stmt_info = vinfo_for_stmt (new_stmt);
3456 }
3457
3458 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 3459 }
b1b6836e
RS
3460 else
3461 /* No current target implements this case. */
3462 return false;
ebfd146a 3463
9771b263 3464 vargs.release ();
ebfd146a 3465
ebfd146a
IR
3466 /* The call in STMT might prevent it from being removed in dce.
3467 We however cannot remove it here, due to the way the ssa name
3468 it defines is mapped to the new definition. So just replace
3469 rhs of the statement with something harmless. */
3470
dd34c087
JJ
3471 if (slp_node)
3472 return true;
3473
ebfd146a 3474 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
3475 if (is_pattern_stmt_p (stmt_info))
3476 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3477 else
3478 lhs = gimple_call_lhs (stmt);
3cc2fa2a 3479
9d5e7640 3480 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 3481 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 3482 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
3483 STMT_VINFO_STMT (stmt_info) = new_stmt;
3484 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
3485
3486 return true;
3487}
3488
3489
0136f8f0
AH
3490struct simd_call_arg_info
3491{
3492 tree vectype;
3493 tree op;
0136f8f0 3494 HOST_WIDE_INT linear_step;
34e82342 3495 enum vect_def_type dt;
0136f8f0 3496 unsigned int align;
17b658af 3497 bool simd_lane_linear;
0136f8f0
AH
3498};
3499
17b658af
JJ
3500/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3501 is linear within simd lane (but not within whole loop), note it in
3502 *ARGINFO. */
3503
3504static void
3505vect_simd_lane_linear (tree op, struct loop *loop,
3506 struct simd_call_arg_info *arginfo)
3507{
355fe088 3508 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
3509
3510 if (!is_gimple_assign (def_stmt)
3511 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3512 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3513 return;
3514
3515 tree base = gimple_assign_rhs1 (def_stmt);
3516 HOST_WIDE_INT linear_step = 0;
3517 tree v = gimple_assign_rhs2 (def_stmt);
3518 while (TREE_CODE (v) == SSA_NAME)
3519 {
3520 tree t;
3521 def_stmt = SSA_NAME_DEF_STMT (v);
3522 if (is_gimple_assign (def_stmt))
3523 switch (gimple_assign_rhs_code (def_stmt))
3524 {
3525 case PLUS_EXPR:
3526 t = gimple_assign_rhs2 (def_stmt);
3527 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3528 return;
3529 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3530 v = gimple_assign_rhs1 (def_stmt);
3531 continue;
3532 case MULT_EXPR:
3533 t = gimple_assign_rhs2 (def_stmt);
3534 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3535 return;
3536 linear_step = tree_to_shwi (t);
3537 v = gimple_assign_rhs1 (def_stmt);
3538 continue;
3539 CASE_CONVERT:
3540 t = gimple_assign_rhs1 (def_stmt);
3541 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3542 || (TYPE_PRECISION (TREE_TYPE (v))
3543 < TYPE_PRECISION (TREE_TYPE (t))))
3544 return;
3545 if (!linear_step)
3546 linear_step = 1;
3547 v = t;
3548 continue;
3549 default:
3550 return;
3551 }
8e4284d0 3552 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
17b658af
JJ
3553 && loop->simduid
3554 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3555 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3556 == loop->simduid))
3557 {
3558 if (!linear_step)
3559 linear_step = 1;
3560 arginfo->linear_step = linear_step;
3561 arginfo->op = base;
3562 arginfo->simd_lane_linear = true;
3563 return;
3564 }
3565 }
3566}
3567
cf1b2ba4
RS
3568/* Return the number of elements in vector type VECTYPE, which is associated
3569 with a SIMD clone. At present these vectors always have a constant
3570 length. */
3571
3572static unsigned HOST_WIDE_INT
3573simd_clone_subparts (tree vectype)
3574{
928686b1 3575 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
cf1b2ba4
RS
3576}
3577
0136f8f0
AH
3578/* Function vectorizable_simd_clone_call.
3579
3580 Check if STMT performs a function call that can be vectorized
3581 by calling a simd clone of the function.
3582 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3583 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3584 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3585
3586static bool
355fe088
TS
3587vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3588 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
3589{
3590 tree vec_dest;
3591 tree scalar_dest;
3592 tree op, type;
3593 tree vec_oprnd0 = NULL_TREE;
3594 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3595 tree vectype;
3596 unsigned int nunits;
3597 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3598 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3599 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 3600 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 3601 tree fndecl, new_temp;
355fe088
TS
3602 gimple *def_stmt;
3603 gimple *new_stmt = NULL;
0136f8f0 3604 int ncopies, j;
00426f9a 3605 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
3606 vec<tree> vargs = vNULL;
3607 size_t i, nargs;
3608 tree lhs, rtype, ratype;
e7a74006 3609 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
0136f8f0
AH
3610
3611 /* Is STMT a vectorizable call? */
3612 if (!is_gimple_call (stmt))
3613 return false;
3614
3615 fndecl = gimple_call_fndecl (stmt);
3616 if (fndecl == NULL_TREE)
3617 return false;
3618
d52f5295 3619 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
3620 if (node == NULL || node->simd_clones == NULL)
3621 return false;
3622
3623 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3624 return false;
3625
66c16fd9
RB
3626 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3627 && ! vec_stmt)
0136f8f0
AH
3628 return false;
3629
3630 if (gimple_call_lhs (stmt)
3631 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3632 return false;
3633
3634 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3635
3636 vectype = STMT_VINFO_VECTYPE (stmt_info);
3637
3638 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3639 return false;
3640
3641 /* FORNOW */
fce57248 3642 if (slp_node)
0136f8f0
AH
3643 return false;
3644
3645 /* Process function arguments. */
3646 nargs = gimple_call_num_args (stmt);
3647
3648 /* Bail out if the function has zero arguments. */
3649 if (nargs == 0)
3650 return false;
3651
00426f9a 3652 arginfo.reserve (nargs, true);
0136f8f0
AH
3653
3654 for (i = 0; i < nargs; i++)
3655 {
3656 simd_call_arg_info thisarginfo;
3657 affine_iv iv;
3658
3659 thisarginfo.linear_step = 0;
3660 thisarginfo.align = 0;
3661 thisarginfo.op = NULL_TREE;
17b658af 3662 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
3663
3664 op = gimple_call_arg (stmt, i);
81c40241
RB
3665 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3666 &thisarginfo.vectype)
0136f8f0
AH
3667 || thisarginfo.dt == vect_uninitialized_def)
3668 {
3669 if (dump_enabled_p ())
3670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3671 "use not simple.\n");
0136f8f0
AH
3672 return false;
3673 }
3674
3675 if (thisarginfo.dt == vect_constant_def
3676 || thisarginfo.dt == vect_external_def)
3677 gcc_assert (thisarginfo.vectype == NULL_TREE);
3678 else
3679 gcc_assert (thisarginfo.vectype != NULL_TREE);
3680
6c9e85fb
JJ
3681 /* For linear arguments, the analyze phase should have saved
3682 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
3683 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3684 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
3685 {
3686 gcc_assert (vec_stmt);
3687 thisarginfo.linear_step
17b658af 3688 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 3689 thisarginfo.op
17b658af
JJ
3690 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3691 thisarginfo.simd_lane_linear
3692 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3693 == boolean_true_node);
6c9e85fb
JJ
3694 /* If loop has been peeled for alignment, we need to adjust it. */
3695 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3696 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 3697 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
3698 {
3699 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 3700 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
3701 tree opt = TREE_TYPE (thisarginfo.op);
3702 bias = fold_convert (TREE_TYPE (step), bias);
3703 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3704 thisarginfo.op
3705 = fold_build2 (POINTER_TYPE_P (opt)
3706 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3707 thisarginfo.op, bias);
3708 }
3709 }
3710 else if (!vec_stmt
3711 && thisarginfo.dt != vect_constant_def
3712 && thisarginfo.dt != vect_external_def
3713 && loop_vinfo
3714 && TREE_CODE (op) == SSA_NAME
3715 && simple_iv (loop, loop_containing_stmt (stmt), op,
3716 &iv, false)
3717 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
3718 {
3719 thisarginfo.linear_step = tree_to_shwi (iv.step);
3720 thisarginfo.op = iv.base;
3721 }
3722 else if ((thisarginfo.dt == vect_constant_def
3723 || thisarginfo.dt == vect_external_def)
3724 && POINTER_TYPE_P (TREE_TYPE (op)))
3725 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
3726 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3727 linear too. */
3728 if (POINTER_TYPE_P (TREE_TYPE (op))
3729 && !thisarginfo.linear_step
3730 && !vec_stmt
3731 && thisarginfo.dt != vect_constant_def
3732 && thisarginfo.dt != vect_external_def
3733 && loop_vinfo
3734 && !slp_node
3735 && TREE_CODE (op) == SSA_NAME)
3736 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
3737
3738 arginfo.quick_push (thisarginfo);
3739 }
3740
d9f21f6a
RS
3741 unsigned HOST_WIDE_INT vf;
3742 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3743 {
3744 if (dump_enabled_p ())
3745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3746 "not considering SIMD clones; not yet supported"
3747 " for variable-width vectors.\n");
3748 return NULL;
3749 }
3750
0136f8f0
AH
3751 unsigned int badness = 0;
3752 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
3753 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3754 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
3755 else
3756 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3757 n = n->simdclone->next_clone)
3758 {
3759 unsigned int this_badness = 0;
d9f21f6a 3760 if (n->simdclone->simdlen > vf
0136f8f0
AH
3761 || n->simdclone->nargs != nargs)
3762 continue;
d9f21f6a
RS
3763 if (n->simdclone->simdlen < vf)
3764 this_badness += (exact_log2 (vf)
0136f8f0
AH
3765 - exact_log2 (n->simdclone->simdlen)) * 1024;
3766 if (n->simdclone->inbranch)
3767 this_badness += 2048;
3768 int target_badness = targetm.simd_clone.usable (n);
3769 if (target_badness < 0)
3770 continue;
3771 this_badness += target_badness * 512;
3772 /* FORNOW: Have to add code to add the mask argument. */
3773 if (n->simdclone->inbranch)
3774 continue;
3775 for (i = 0; i < nargs; i++)
3776 {
3777 switch (n->simdclone->args[i].arg_type)
3778 {
3779 case SIMD_CLONE_ARG_TYPE_VECTOR:
3780 if (!useless_type_conversion_p
3781 (n->simdclone->args[i].orig_type,
3782 TREE_TYPE (gimple_call_arg (stmt, i))))
3783 i = -1;
3784 else if (arginfo[i].dt == vect_constant_def
3785 || arginfo[i].dt == vect_external_def
3786 || arginfo[i].linear_step)
3787 this_badness += 64;
3788 break;
3789 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3790 if (arginfo[i].dt != vect_constant_def
3791 && arginfo[i].dt != vect_external_def)
3792 i = -1;
3793 break;
3794 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 3795 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3796 if (arginfo[i].dt == vect_constant_def
3797 || arginfo[i].dt == vect_external_def
3798 || (arginfo[i].linear_step
3799 != n->simdclone->args[i].linear_step))
3800 i = -1;
3801 break;
3802 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
3803 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3804 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
3805 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3806 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3807 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3808 /* FORNOW */
3809 i = -1;
3810 break;
3811 case SIMD_CLONE_ARG_TYPE_MASK:
3812 gcc_unreachable ();
3813 }
3814 if (i == (size_t) -1)
3815 break;
3816 if (n->simdclone->args[i].alignment > arginfo[i].align)
3817 {
3818 i = -1;
3819 break;
3820 }
3821 if (arginfo[i].align)
3822 this_badness += (exact_log2 (arginfo[i].align)
3823 - exact_log2 (n->simdclone->args[i].alignment));
3824 }
3825 if (i == (size_t) -1)
3826 continue;
3827 if (bestn == NULL || this_badness < badness)
3828 {
3829 bestn = n;
3830 badness = this_badness;
3831 }
3832 }
3833
3834 if (bestn == NULL)
00426f9a 3835 return false;
0136f8f0
AH
3836
3837 for (i = 0; i < nargs; i++)
3838 if ((arginfo[i].dt == vect_constant_def
3839 || arginfo[i].dt == vect_external_def)
3840 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3841 {
3842 arginfo[i].vectype
3843 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3844 i)));
3845 if (arginfo[i].vectype == NULL
cf1b2ba4 3846 || (simd_clone_subparts (arginfo[i].vectype)
0136f8f0 3847 > bestn->simdclone->simdlen))
00426f9a 3848 return false;
0136f8f0
AH
3849 }
3850
3851 fndecl = bestn->decl;
3852 nunits = bestn->simdclone->simdlen;
d9f21f6a 3853 ncopies = vf / nunits;
0136f8f0
AH
3854
3855 /* If the function isn't const, only allow it in simd loops where user
3856 has asserted that at least nunits consecutive iterations can be
3857 performed using SIMD instructions. */
3858 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3859 && gimple_vuse (stmt))
00426f9a 3860 return false;
0136f8f0
AH
3861
3862 /* Sanity check: make sure that at least one copy of the vectorized stmt
3863 needs to be generated. */
3864 gcc_assert (ncopies >= 1);
3865
3866 if (!vec_stmt) /* transformation not required. */
3867 {
6c9e85fb
JJ
3868 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3869 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3870 if ((bestn->simdclone->args[i].arg_type
3871 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3872 || (bestn->simdclone->args[i].arg_type
3873 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3874 {
17b658af 3875 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3876 + 1);
3877 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3878 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3879 ? size_type_node : TREE_TYPE (arginfo[i].op);
3880 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3881 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3882 tree sll = arginfo[i].simd_lane_linear
3883 ? boolean_true_node : boolean_false_node;
3884 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3885 }
0136f8f0
AH
3886 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3887 if (dump_enabled_p ())
3888 dump_printf_loc (MSG_NOTE, vect_location,
3889 "=== vectorizable_simd_clone_call ===\n");
3890/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3891 return true;
3892 }
3893
67b8dbac 3894 /* Transform. */
0136f8f0
AH
3895
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3898
3899 /* Handle def. */
3900 scalar_dest = gimple_call_lhs (stmt);
3901 vec_dest = NULL_TREE;
3902 rtype = NULL_TREE;
3903 ratype = NULL_TREE;
3904 if (scalar_dest)
3905 {
3906 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3907 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3908 if (TREE_CODE (rtype) == ARRAY_TYPE)
3909 {
3910 ratype = rtype;
3911 rtype = TREE_TYPE (ratype);
3912 }
3913 }
3914
3915 prev_stmt_info = NULL;
3916 for (j = 0; j < ncopies; ++j)
3917 {
3918 /* Build argument list for the vectorized call. */
3919 if (j == 0)
3920 vargs.create (nargs);
3921 else
3922 vargs.truncate (0);
3923
3924 for (i = 0; i < nargs; i++)
3925 {
3926 unsigned int k, l, m, o;
3927 tree atype;
3928 op = gimple_call_arg (stmt, i);
3929 switch (bestn->simdclone->args[i].arg_type)
3930 {
3931 case SIMD_CLONE_ARG_TYPE_VECTOR:
3932 atype = bestn->simdclone->args[i].vector_type;
cf1b2ba4 3933 o = nunits / simd_clone_subparts (atype);
0136f8f0
AH
3934 for (m = j * o; m < (j + 1) * o; m++)
3935 {
cf1b2ba4
RS
3936 if (simd_clone_subparts (atype)
3937 < simd_clone_subparts (arginfo[i].vectype))
0136f8f0 3938 {
73a699ae 3939 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
cf1b2ba4
RS
3940 k = (simd_clone_subparts (arginfo[i].vectype)
3941 / simd_clone_subparts (atype));
0136f8f0
AH
3942 gcc_assert ((k & (k - 1)) == 0);
3943 if (m == 0)
3944 vec_oprnd0
81c40241 3945 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3946 else
3947 {
3948 vec_oprnd0 = arginfo[i].op;
3949 if ((m & (k - 1)) == 0)
3950 vec_oprnd0
3951 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3952 vec_oprnd0);
3953 }
3954 arginfo[i].op = vec_oprnd0;
3955 vec_oprnd0
3956 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
92e29a5e 3957 bitsize_int (prec),
0136f8f0
AH
3958 bitsize_int ((m & (k - 1)) * prec));
3959 new_stmt
b731b390 3960 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3961 vec_oprnd0);
3962 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3963 vargs.safe_push (gimple_assign_lhs (new_stmt));
3964 }
3965 else
3966 {
cf1b2ba4
RS
3967 k = (simd_clone_subparts (atype)
3968 / simd_clone_subparts (arginfo[i].vectype));
0136f8f0
AH
3969 gcc_assert ((k & (k - 1)) == 0);
3970 vec<constructor_elt, va_gc> *ctor_elts;
3971 if (k != 1)
3972 vec_alloc (ctor_elts, k);
3973 else
3974 ctor_elts = NULL;
3975 for (l = 0; l < k; l++)
3976 {
3977 if (m == 0 && l == 0)
3978 vec_oprnd0
81c40241 3979 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3980 else
3981 vec_oprnd0
3982 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3983 arginfo[i].op);
3984 arginfo[i].op = vec_oprnd0;
3985 if (k == 1)
3986 break;
3987 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3988 vec_oprnd0);
3989 }
3990 if (k == 1)
3991 vargs.safe_push (vec_oprnd0);
3992 else
3993 {
3994 vec_oprnd0 = build_constructor (atype, ctor_elts);
3995 new_stmt
b731b390 3996 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3997 vec_oprnd0);
3998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3999 vargs.safe_push (gimple_assign_lhs (new_stmt));
4000 }
4001 }
4002 }
4003 break;
4004 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4005 vargs.safe_push (op);
4006 break;
4007 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 4008 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
4009 if (j == 0)
4010 {
4011 gimple_seq stmts;
4012 arginfo[i].op
4013 = force_gimple_operand (arginfo[i].op, &stmts, true,
4014 NULL_TREE);
4015 if (stmts != NULL)
4016 {
4017 basic_block new_bb;
4018 edge pe = loop_preheader_edge (loop);
4019 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4020 gcc_assert (!new_bb);
4021 }
17b658af
JJ
4022 if (arginfo[i].simd_lane_linear)
4023 {
4024 vargs.safe_push (arginfo[i].op);
4025 break;
4026 }
b731b390 4027 tree phi_res = copy_ssa_name (op);
538dd0b7 4028 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 4029 set_vinfo_for_stmt (new_phi,
310213d4 4030 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
4031 add_phi_arg (new_phi, arginfo[i].op,
4032 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4033 enum tree_code code
4034 = POINTER_TYPE_P (TREE_TYPE (op))
4035 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4036 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4037 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4038 widest_int cst
4039 = wi::mul (bestn->simdclone->args[i].linear_step,
4040 ncopies * nunits);
4041 tree tcst = wide_int_to_tree (type, cst);
b731b390 4042 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
4043 new_stmt
4044 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
4045 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4046 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4047 set_vinfo_for_stmt (new_stmt,
310213d4 4048 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
4049 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4050 UNKNOWN_LOCATION);
4051 arginfo[i].op = phi_res;
4052 vargs.safe_push (phi_res);
4053 }
4054 else
4055 {
4056 enum tree_code code
4057 = POINTER_TYPE_P (TREE_TYPE (op))
4058 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4059 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4060 ? sizetype : TREE_TYPE (op);
807e902e
KZ
4061 widest_int cst
4062 = wi::mul (bestn->simdclone->args[i].linear_step,
4063 j * nunits);
4064 tree tcst = wide_int_to_tree (type, cst);
b731b390 4065 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
4066 new_stmt = gimple_build_assign (new_temp, code,
4067 arginfo[i].op, tcst);
0136f8f0
AH
4068 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4069 vargs.safe_push (new_temp);
4070 }
4071 break;
7adb26f2
JJ
4072 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4073 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 4074 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
4075 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4076 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4077 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
4078 default:
4079 gcc_unreachable ();
4080 }
4081 }
4082
4083 new_stmt = gimple_build_call_vec (fndecl, vargs);
4084 if (vec_dest)
4085 {
cf1b2ba4 4086 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
0136f8f0 4087 if (ratype)
b731b390 4088 new_temp = create_tmp_var (ratype);
cf1b2ba4
RS
4089 else if (simd_clone_subparts (vectype)
4090 == simd_clone_subparts (rtype))
0136f8f0
AH
4091 new_temp = make_ssa_name (vec_dest, new_stmt);
4092 else
4093 new_temp = make_ssa_name (rtype, new_stmt);
4094 gimple_call_set_lhs (new_stmt, new_temp);
4095 }
4096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4097
4098 if (vec_dest)
4099 {
cf1b2ba4 4100 if (simd_clone_subparts (vectype) < nunits)
0136f8f0
AH
4101 {
4102 unsigned int k, l;
73a699ae
RS
4103 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4104 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
cf1b2ba4 4105 k = nunits / simd_clone_subparts (vectype);
0136f8f0
AH
4106 gcc_assert ((k & (k - 1)) == 0);
4107 for (l = 0; l < k; l++)
4108 {
4109 tree t;
4110 if (ratype)
4111 {
4112 t = build_fold_addr_expr (new_temp);
4113 t = build2 (MEM_REF, vectype, t,
73a699ae 4114 build_int_cst (TREE_TYPE (t), l * bytes));
0136f8f0
AH
4115 }
4116 else
4117 t = build3 (BIT_FIELD_REF, vectype, new_temp,
92e29a5e 4118 bitsize_int (prec), bitsize_int (l * prec));
0136f8f0 4119 new_stmt
b731b390 4120 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
4121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4122 if (j == 0 && l == 0)
4123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4124 else
4125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4126
4127 prev_stmt_info = vinfo_for_stmt (new_stmt);
4128 }
4129
4130 if (ratype)
4131 {
4132 tree clobber = build_constructor (ratype, NULL);
4133 TREE_THIS_VOLATILE (clobber) = 1;
4134 new_stmt = gimple_build_assign (new_temp, clobber);
4135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4136 }
4137 continue;
4138 }
cf1b2ba4 4139 else if (simd_clone_subparts (vectype) > nunits)
0136f8f0 4140 {
cf1b2ba4
RS
4141 unsigned int k = (simd_clone_subparts (vectype)
4142 / simd_clone_subparts (rtype));
0136f8f0
AH
4143 gcc_assert ((k & (k - 1)) == 0);
4144 if ((j & (k - 1)) == 0)
4145 vec_alloc (ret_ctor_elts, k);
4146 if (ratype)
4147 {
cf1b2ba4 4148 unsigned int m, o = nunits / simd_clone_subparts (rtype);
0136f8f0
AH
4149 for (m = 0; m < o; m++)
4150 {
4151 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4152 size_int (m), NULL_TREE, NULL_TREE);
4153 new_stmt
b731b390 4154 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
4155 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4156 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4157 gimple_assign_lhs (new_stmt));
4158 }
4159 tree clobber = build_constructor (ratype, NULL);
4160 TREE_THIS_VOLATILE (clobber) = 1;
4161 new_stmt = gimple_build_assign (new_temp, clobber);
4162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4163 }
4164 else
4165 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4166 if ((j & (k - 1)) != k - 1)
4167 continue;
4168 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4169 new_stmt
b731b390 4170 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
4171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4172
4173 if ((unsigned) j == k - 1)
4174 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4175 else
4176 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4177
4178 prev_stmt_info = vinfo_for_stmt (new_stmt);
4179 continue;
4180 }
4181 else if (ratype)
4182 {
4183 tree t = build_fold_addr_expr (new_temp);
4184 t = build2 (MEM_REF, vectype, t,
4185 build_int_cst (TREE_TYPE (t), 0));
4186 new_stmt
b731b390 4187 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
4188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4189 tree clobber = build_constructor (ratype, NULL);
4190 TREE_THIS_VOLATILE (clobber) = 1;
4191 vect_finish_stmt_generation (stmt,
4192 gimple_build_assign (new_temp,
4193 clobber), gsi);
4194 }
4195 }
4196
4197 if (j == 0)
4198 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4199 else
4200 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4201
4202 prev_stmt_info = vinfo_for_stmt (new_stmt);
4203 }
4204
4205 vargs.release ();
4206
4207 /* The call in STMT might prevent it from being removed in dce.
4208 We however cannot remove it here, due to the way the ssa name
4209 it defines is mapped to the new definition. So just replace
4210 rhs of the statement with something harmless. */
4211
4212 if (slp_node)
4213 return true;
4214
4215 if (scalar_dest)
4216 {
4217 type = TREE_TYPE (scalar_dest);
4218 if (is_pattern_stmt_p (stmt_info))
4219 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4220 else
4221 lhs = gimple_call_lhs (stmt);
4222 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4223 }
4224 else
4225 new_stmt = gimple_build_nop ();
4226 set_vinfo_for_stmt (new_stmt, stmt_info);
4227 set_vinfo_for_stmt (stmt, NULL);
4228 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 4229 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
4230 unlink_stmt_vdef (stmt);
4231
4232 return true;
4233}
4234
4235
ebfd146a
IR
4236/* Function vect_gen_widened_results_half
4237
4238 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 4239 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 4240 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
4241 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4242 needs to be created (DECL is a function-decl of a target-builtin).
4243 STMT is the original scalar stmt that we are vectorizing. */
4244
355fe088 4245static gimple *
ebfd146a
IR
4246vect_gen_widened_results_half (enum tree_code code,
4247 tree decl,
4248 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4249 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 4250 gimple *stmt)
b8698a0f 4251{
355fe088 4252 gimple *new_stmt;
b8698a0f
L
4253 tree new_temp;
4254
4255 /* Generate half of the widened result: */
4256 if (code == CALL_EXPR)
4257 {
4258 /* Target specific support */
ebfd146a
IR
4259 if (op_type == binary_op)
4260 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4261 else
4262 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4263 new_temp = make_ssa_name (vec_dest, new_stmt);
4264 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
4265 }
4266 else
ebfd146a 4267 {
b8698a0f
L
4268 /* Generic support */
4269 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
4270 if (op_type != binary_op)
4271 vec_oprnd1 = NULL;
0d0e4a03 4272 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
4273 new_temp = make_ssa_name (vec_dest, new_stmt);
4274 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 4275 }
ebfd146a
IR
4276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4277
ebfd146a
IR
4278 return new_stmt;
4279}
4280
4a00c761
JJ
4281
4282/* Get vectorized definitions for loop-based vectorization. For the first
4283 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4284 scalar operand), and for the rest we get a copy with
4285 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4286 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4287 The vectors are collected into VEC_OPRNDS. */
4288
4289static void
355fe088 4290vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 4291 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
4292{
4293 tree vec_oprnd;
4294
4295 /* Get first vector operand. */
4296 /* All the vector operands except the very first one (that is scalar oprnd)
4297 are stmt copies. */
4298 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 4299 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
4300 else
4301 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4302
9771b263 4303 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4304
4305 /* Get second vector operand. */
4306 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 4307 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
4308
4309 *oprnd = vec_oprnd;
4310
4311 /* For conversion in multiple steps, continue to get operands
4312 recursively. */
4313 if (multi_step_cvt)
4314 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4315}
4316
4317
4318/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4319 For multi-step conversions store the resulting vectors and call the function
4320 recursively. */
4321
4322static void
9771b263 4323vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 4324 int multi_step_cvt, gimple *stmt,
9771b263 4325 vec<tree> vec_dsts,
4a00c761
JJ
4326 gimple_stmt_iterator *gsi,
4327 slp_tree slp_node, enum tree_code code,
4328 stmt_vec_info *prev_stmt_info)
4329{
4330 unsigned int i;
4331 tree vop0, vop1, new_tmp, vec_dest;
355fe088 4332 gimple *new_stmt;
4a00c761
JJ
4333 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4334
9771b263 4335 vec_dest = vec_dsts.pop ();
4a00c761 4336
9771b263 4337 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
4338 {
4339 /* Create demotion operation. */
9771b263
DN
4340 vop0 = (*vec_oprnds)[i];
4341 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 4342 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
4343 new_tmp = make_ssa_name (vec_dest, new_stmt);
4344 gimple_assign_set_lhs (new_stmt, new_tmp);
4345 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4346
4347 if (multi_step_cvt)
4348 /* Store the resulting vector for next recursive call. */
9771b263 4349 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
4350 else
4351 {
4352 /* This is the last step of the conversion sequence. Store the
4353 vectors in SLP_NODE or in vector info of the scalar statement
4354 (or in STMT_VINFO_RELATED_STMT chain). */
4355 if (slp_node)
9771b263 4356 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4357 else
c689ce1e
RB
4358 {
4359 if (!*prev_stmt_info)
4360 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4361 else
4362 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 4363
c689ce1e
RB
4364 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4365 }
4a00c761
JJ
4366 }
4367 }
4368
4369 /* For multi-step demotion operations we first generate demotion operations
4370 from the source type to the intermediate types, and then combine the
4371 results (stored in VEC_OPRNDS) in demotion operation to the destination
4372 type. */
4373 if (multi_step_cvt)
4374 {
4375 /* At each level of recursion we have half of the operands we had at the
4376 previous level. */
9771b263 4377 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
4378 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4379 stmt, vec_dsts, gsi, slp_node,
4380 VEC_PACK_TRUNC_EXPR,
4381 prev_stmt_info);
4382 }
4383
9771b263 4384 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4385}
4386
4387
4388/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4389 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4390 the resulting vectors and call the function recursively. */
4391
4392static void
9771b263
DN
4393vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4394 vec<tree> *vec_oprnds1,
355fe088 4395 gimple *stmt, tree vec_dest,
4a00c761
JJ
4396 gimple_stmt_iterator *gsi,
4397 enum tree_code code1,
4398 enum tree_code code2, tree decl1,
4399 tree decl2, int op_type)
4400{
4401 int i;
4402 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 4403 gimple *new_stmt1, *new_stmt2;
6e1aa848 4404 vec<tree> vec_tmp = vNULL;
4a00c761 4405
9771b263
DN
4406 vec_tmp.create (vec_oprnds0->length () * 2);
4407 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
4408 {
4409 if (op_type == binary_op)
9771b263 4410 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
4411 else
4412 vop1 = NULL_TREE;
4413
4414 /* Generate the two halves of promotion operation. */
4415 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4416 op_type, vec_dest, gsi, stmt);
4417 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4418 op_type, vec_dest, gsi, stmt);
4419 if (is_gimple_call (new_stmt1))
4420 {
4421 new_tmp1 = gimple_call_lhs (new_stmt1);
4422 new_tmp2 = gimple_call_lhs (new_stmt2);
4423 }
4424 else
4425 {
4426 new_tmp1 = gimple_assign_lhs (new_stmt1);
4427 new_tmp2 = gimple_assign_lhs (new_stmt2);
4428 }
4429
4430 /* Store the results for the next step. */
9771b263
DN
4431 vec_tmp.quick_push (new_tmp1);
4432 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
4433 }
4434
689eaba3 4435 vec_oprnds0->release ();
4a00c761
JJ
4436 *vec_oprnds0 = vec_tmp;
4437}
4438
4439
b8698a0f
L
4440/* Check if STMT performs a conversion operation, that can be vectorized.
4441 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 4442 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4443 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4444
4445static bool
355fe088
TS
4446vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4447 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4448{
4449 tree vec_dest;
4450 tree scalar_dest;
4a00c761 4451 tree op0, op1 = NULL_TREE;
ebfd146a
IR
4452 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4453 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4454 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4455 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 4456 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
4457 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4458 tree new_temp;
355fe088 4459 gimple *def_stmt;
ebfd146a 4460 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 4461 int ndts = 2;
355fe088 4462 gimple *new_stmt = NULL;
ebfd146a 4463 stmt_vec_info prev_stmt_info;
062d5ccc
RS
4464 poly_uint64 nunits_in;
4465 poly_uint64 nunits_out;
ebfd146a 4466 tree vectype_out, vectype_in;
4a00c761
JJ
4467 int ncopies, i, j;
4468 tree lhs_type, rhs_type;
ebfd146a 4469 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
4470 vec<tree> vec_oprnds0 = vNULL;
4471 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 4472 tree vop0;
4a00c761 4473 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4474 vec_info *vinfo = stmt_info->vinfo;
4a00c761 4475 int multi_step_cvt = 0;
6e1aa848 4476 vec<tree> interm_types = vNULL;
4a00c761
JJ
4477 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4478 int op_type;
4a00c761 4479 unsigned short fltsz;
ebfd146a
IR
4480
4481 /* Is STMT a vectorizable conversion? */
4482
4a00c761 4483 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4484 return false;
4485
66c16fd9
RB
4486 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4487 && ! vec_stmt)
ebfd146a
IR
4488 return false;
4489
4490 if (!is_gimple_assign (stmt))
4491 return false;
4492
4493 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4494 return false;
4495
4496 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
4497 if (!CONVERT_EXPR_CODE_P (code)
4498 && code != FIX_TRUNC_EXPR
4499 && code != FLOAT_EXPR
4500 && code != WIDEN_MULT_EXPR
4501 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
4502 return false;
4503
4a00c761
JJ
4504 op_type = TREE_CODE_LENGTH (code);
4505
ebfd146a 4506 /* Check types of lhs and rhs. */
b690cc0f 4507 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 4508 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
4509 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4510
ebfd146a
IR
4511 op0 = gimple_assign_rhs1 (stmt);
4512 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
4513
4514 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4515 && !((INTEGRAL_TYPE_P (lhs_type)
4516 && INTEGRAL_TYPE_P (rhs_type))
4517 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4518 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4519 return false;
4520
e6f5c25d
IE
4521 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4522 && ((INTEGRAL_TYPE_P (lhs_type)
2be65d9e 4523 && !type_has_mode_precision_p (lhs_type))
e6f5c25d 4524 || (INTEGRAL_TYPE_P (rhs_type)
2be65d9e 4525 && !type_has_mode_precision_p (rhs_type))))
4a00c761 4526 {
73fbfcad 4527 if (dump_enabled_p ())
78c60e3d 4528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
4529 "type conversion to/from bit-precision unsupported."
4530 "\n");
4a00c761
JJ
4531 return false;
4532 }
4533
b690cc0f 4534 /* Check the operands of the operation. */
81c40241 4535 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 4536 {
73fbfcad 4537 if (dump_enabled_p ())
78c60e3d 4538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4539 "use not simple.\n");
b690cc0f
RG
4540 return false;
4541 }
4a00c761
JJ
4542 if (op_type == binary_op)
4543 {
4544 bool ok;
4545
4546 op1 = gimple_assign_rhs2 (stmt);
4547 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4548 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4549 OP1. */
4550 if (CONSTANT_CLASS_P (op0))
81c40241 4551 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 4552 else
81c40241 4553 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
4554
4555 if (!ok)
4556 {
73fbfcad 4557 if (dump_enabled_p ())
78c60e3d 4558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4559 "use not simple.\n");
4a00c761
JJ
4560 return false;
4561 }
4562 }
4563
b690cc0f
RG
4564 /* If op0 is an external or constant defs use a vector type of
4565 the same size as the output vector type. */
ebfd146a 4566 if (!vectype_in)
b690cc0f 4567 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
4568 if (vec_stmt)
4569 gcc_assert (vectype_in);
4570 if (!vectype_in)
4571 {
73fbfcad 4572 if (dump_enabled_p ())
4a00c761 4573 {
78c60e3d
SS
4574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4575 "no vectype for scalar type ");
4576 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 4577 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 4578 }
7d8930a0
IR
4579
4580 return false;
4581 }
ebfd146a 4582
e6f5c25d
IE
4583 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4584 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4585 {
4586 if (dump_enabled_p ())
4587 {
4588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4589 "can't convert between boolean and non "
4590 "boolean vectors");
4591 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4592 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4593 }
4594
4595 return false;
4596 }
4597
b690cc0f
RG
4598 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4599 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
062d5ccc 4600 if (known_eq (nunits_out, nunits_in))
ebfd146a 4601 modifier = NONE;
062d5ccc
RS
4602 else if (multiple_p (nunits_out, nunits_in))
4603 modifier = NARROW;
ebfd146a 4604 else
062d5ccc
RS
4605 {
4606 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4607 modifier = WIDEN;
4608 }
ebfd146a 4609
ff802fa1
IR
4610 /* Multiple types in SLP are handled by creating the appropriate number of
4611 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4612 case of SLP. */
fce57248 4613 if (slp_node)
ebfd146a 4614 ncopies = 1;
4a00c761 4615 else if (modifier == NARROW)
e8f142e2 4616 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4a00c761 4617 else
e8f142e2 4618 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
b8698a0f 4619
ebfd146a
IR
4620 /* Sanity check: make sure that at least one copy of the vectorized stmt
4621 needs to be generated. */
4622 gcc_assert (ncopies >= 1);
4623
16d22000
RS
4624 bool found_mode = false;
4625 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4626 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4627 opt_scalar_mode rhs_mode_iter;
b397965c 4628
ebfd146a 4629 /* Supportable by target? */
4a00c761 4630 switch (modifier)
ebfd146a 4631 {
4a00c761
JJ
4632 case NONE:
4633 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4634 return false;
4635 if (supportable_convert_operation (code, vectype_out, vectype_in,
4636 &decl1, &code1))
4637 break;
4638 /* FALLTHRU */
4639 unsupported:
73fbfcad 4640 if (dump_enabled_p ())
78c60e3d 4641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4642 "conversion not supported by target.\n");
ebfd146a 4643 return false;
ebfd146a 4644
4a00c761
JJ
4645 case WIDEN:
4646 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
4647 &code1, &code2, &multi_step_cvt,
4648 &interm_types))
4a00c761
JJ
4649 {
4650 /* Binary widening operation can only be supported directly by the
4651 architecture. */
4652 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4653 break;
4654 }
4655
4656 if (code != FLOAT_EXPR
b397965c 4657 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4658 goto unsupported;
4659
b397965c 4660 fltsz = GET_MODE_SIZE (lhs_mode);
16d22000 4661 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4a00c761 4662 {
16d22000 4663 rhs_mode = rhs_mode_iter.require ();
c94843d2
RS
4664 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4665 break;
4666
4a00c761
JJ
4667 cvt_type
4668 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4669 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4670 if (cvt_type == NULL_TREE)
4671 goto unsupported;
4672
4673 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4674 {
4675 if (!supportable_convert_operation (code, vectype_out,
4676 cvt_type, &decl1, &codecvt1))
4677 goto unsupported;
4678 }
4679 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
4680 cvt_type, &codecvt1,
4681 &codecvt2, &multi_step_cvt,
4a00c761
JJ
4682 &interm_types))
4683 continue;
4684 else
4685 gcc_assert (multi_step_cvt == 0);
4686
4687 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
4688 vectype_in, &code1, &code2,
4689 &multi_step_cvt, &interm_types))
16d22000
RS
4690 {
4691 found_mode = true;
4692 break;
4693 }
4a00c761
JJ
4694 }
4695
16d22000 4696 if (!found_mode)
4a00c761
JJ
4697 goto unsupported;
4698
4699 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4700 codecvt2 = ERROR_MARK;
4701 else
4702 {
4703 multi_step_cvt++;
9771b263 4704 interm_types.safe_push (cvt_type);
4a00c761
JJ
4705 cvt_type = NULL_TREE;
4706 }
4707 break;
4708
4709 case NARROW:
4710 gcc_assert (op_type == unary_op);
4711 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4712 &code1, &multi_step_cvt,
4713 &interm_types))
4714 break;
4715
4716 if (code != FIX_TRUNC_EXPR
b397965c 4717 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4a00c761
JJ
4718 goto unsupported;
4719
4a00c761
JJ
4720 cvt_type
4721 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4722 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4723 if (cvt_type == NULL_TREE)
4724 goto unsupported;
4725 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4726 &decl1, &codecvt1))
4727 goto unsupported;
4728 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4729 &code1, &multi_step_cvt,
4730 &interm_types))
4731 break;
4732 goto unsupported;
4733
4734 default:
4735 gcc_unreachable ();
ebfd146a
IR
4736 }
4737
4738 if (!vec_stmt) /* transformation not required. */
4739 {
73fbfcad 4740 if (dump_enabled_p ())
78c60e3d 4741 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4742 "=== vectorizable_conversion ===\n");
4a00c761 4743 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
4744 {
4745 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
78604de0
RB
4746 if (!slp_node)
4747 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
8bd37302 4748 }
4a00c761
JJ
4749 else if (modifier == NARROW)
4750 {
4751 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
78604de0
RB
4752 if (!slp_node)
4753 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
4754 }
4755 else
4756 {
4757 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
78604de0
RB
4758 if (!slp_node)
4759 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 4760 }
9771b263 4761 interm_types.release ();
ebfd146a
IR
4762 return true;
4763 }
4764
67b8dbac 4765 /* Transform. */
73fbfcad 4766 if (dump_enabled_p ())
78c60e3d 4767 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4768 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 4769
4a00c761
JJ
4770 if (op_type == binary_op)
4771 {
4772 if (CONSTANT_CLASS_P (op0))
4773 op0 = fold_convert (TREE_TYPE (op1), op0);
4774 else if (CONSTANT_CLASS_P (op1))
4775 op1 = fold_convert (TREE_TYPE (op0), op1);
4776 }
4777
4778 /* In case of multi-step conversion, we first generate conversion operations
4779 to the intermediate types, and then from that types to the final one.
4780 We create vector destinations for the intermediate type (TYPES) received
4781 from supportable_*_operation, and store them in the correct order
4782 for future use in vect_create_vectorized_*_stmts (). */
8c681247 4783 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
82294ec1
JJ
4784 vec_dest = vect_create_destination_var (scalar_dest,
4785 (cvt_type && modifier == WIDEN)
4786 ? cvt_type : vectype_out);
9771b263 4787 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4788
4789 if (multi_step_cvt)
4790 {
9771b263
DN
4791 for (i = interm_types.length () - 1;
4792 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
4793 {
4794 vec_dest = vect_create_destination_var (scalar_dest,
4795 intermediate_type);
9771b263 4796 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
4797 }
4798 }
ebfd146a 4799
4a00c761 4800 if (cvt_type)
82294ec1
JJ
4801 vec_dest = vect_create_destination_var (scalar_dest,
4802 modifier == WIDEN
4803 ? vectype_out : cvt_type);
4a00c761
JJ
4804
4805 if (!slp_node)
4806 {
30862efc 4807 if (modifier == WIDEN)
4a00c761 4808 {
c3284718 4809 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 4810 if (op_type == binary_op)
9771b263 4811 vec_oprnds1.create (1);
4a00c761 4812 }
30862efc 4813 else if (modifier == NARROW)
9771b263
DN
4814 vec_oprnds0.create (
4815 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
4816 }
4817 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4818 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4819
4a00c761 4820 last_oprnd = op0;
ebfd146a
IR
4821 prev_stmt_info = NULL;
4822 switch (modifier)
4823 {
4824 case NONE:
4825 for (j = 0; j < ncopies; j++)
4826 {
ebfd146a 4827 if (j == 0)
306b0c92 4828 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
4829 else
4830 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4831
9771b263 4832 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4833 {
4834 /* Arguments are ready, create the new vector stmt. */
4835 if (code1 == CALL_EXPR)
4836 {
4837 new_stmt = gimple_build_call (decl1, 1, vop0);
4838 new_temp = make_ssa_name (vec_dest, new_stmt);
4839 gimple_call_set_lhs (new_stmt, new_temp);
4840 }
4841 else
4842 {
4843 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4844 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4845 new_temp = make_ssa_name (vec_dest, new_stmt);
4846 gimple_assign_set_lhs (new_stmt, new_temp);
4847 }
4848
4849 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4850 if (slp_node)
9771b263 4851 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4852 else
4853 {
4854 if (!prev_stmt_info)
4855 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4856 else
4857 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4858 prev_stmt_info = vinfo_for_stmt (new_stmt);
4859 }
4a00c761 4860 }
ebfd146a
IR
4861 }
4862 break;
4863
4864 case WIDEN:
4865 /* In case the vectorization factor (VF) is bigger than the number
4866 of elements that we can fit in a vectype (nunits), we have to
4867 generate more than one vector stmt - i.e - we need to "unroll"
4868 the vector stmt by a factor VF/nunits. */
4869 for (j = 0; j < ncopies; j++)
4870 {
4a00c761 4871 /* Handle uses. */
ebfd146a 4872 if (j == 0)
4a00c761
JJ
4873 {
4874 if (slp_node)
4875 {
4876 if (code == WIDEN_LSHIFT_EXPR)
4877 {
4878 unsigned int k;
ebfd146a 4879
4a00c761
JJ
4880 vec_oprnd1 = op1;
4881 /* Store vec_oprnd1 for every vector stmt to be created
4882 for SLP_NODE. We check during the analysis that all
4883 the shift arguments are the same. */
4884 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4885 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4886
4887 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4888 slp_node);
4a00c761
JJ
4889 }
4890 else
4891 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
306b0c92 4892 &vec_oprnds1, slp_node);
4a00c761
JJ
4893 }
4894 else
4895 {
81c40241 4896 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4897 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4898 if (op_type == binary_op)
4899 {
4900 if (code == WIDEN_LSHIFT_EXPR)
4901 vec_oprnd1 = op1;
4902 else
81c40241 4903 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4904 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4905 }
4906 }
4907 }
ebfd146a 4908 else
4a00c761
JJ
4909 {
4910 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4911 vec_oprnds0.truncate (0);
4912 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4913 if (op_type == binary_op)
4914 {
4915 if (code == WIDEN_LSHIFT_EXPR)
4916 vec_oprnd1 = op1;
4917 else
4918 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4919 vec_oprnd1);
9771b263
DN
4920 vec_oprnds1.truncate (0);
4921 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4922 }
4923 }
ebfd146a 4924
4a00c761
JJ
4925 /* Arguments are ready. Create the new vector stmts. */
4926 for (i = multi_step_cvt; i >= 0; i--)
4927 {
9771b263 4928 tree this_dest = vec_dsts[i];
4a00c761
JJ
4929 enum tree_code c1 = code1, c2 = code2;
4930 if (i == 0 && codecvt2 != ERROR_MARK)
4931 {
4932 c1 = codecvt1;
4933 c2 = codecvt2;
4934 }
4935 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4936 &vec_oprnds1,
4937 stmt, this_dest, gsi,
4938 c1, c2, decl1, decl2,
4939 op_type);
4940 }
4941
9771b263 4942 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4943 {
4944 if (cvt_type)
4945 {
4946 if (codecvt1 == CALL_EXPR)
4947 {
4948 new_stmt = gimple_build_call (decl1, 1, vop0);
4949 new_temp = make_ssa_name (vec_dest, new_stmt);
4950 gimple_call_set_lhs (new_stmt, new_temp);
4951 }
4952 else
4953 {
4954 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4955 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4956 new_stmt = gimple_build_assign (new_temp, codecvt1,
4957 vop0);
4a00c761
JJ
4958 }
4959
4960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4961 }
4962 else
4963 new_stmt = SSA_NAME_DEF_STMT (vop0);
4964
4965 if (slp_node)
9771b263 4966 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4967 else
c689ce1e
RB
4968 {
4969 if (!prev_stmt_info)
4970 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4971 else
4972 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4973 prev_stmt_info = vinfo_for_stmt (new_stmt);
4974 }
4a00c761 4975 }
ebfd146a 4976 }
4a00c761
JJ
4977
4978 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4979 break;
4980
4981 case NARROW:
4982 /* In case the vectorization factor (VF) is bigger than the number
4983 of elements that we can fit in a vectype (nunits), we have to
4984 generate more than one vector stmt - i.e - we need to "unroll"
4985 the vector stmt by a factor VF/nunits. */
4986 for (j = 0; j < ncopies; j++)
4987 {
4988 /* Handle uses. */
4a00c761
JJ
4989 if (slp_node)
4990 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 4991 slp_node);
ebfd146a
IR
4992 else
4993 {
9771b263 4994 vec_oprnds0.truncate (0);
4a00c761
JJ
4995 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4996 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4997 }
4998
4a00c761
JJ
4999 /* Arguments are ready. Create the new vector stmts. */
5000 if (cvt_type)
9771b263 5001 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
5002 {
5003 if (codecvt1 == CALL_EXPR)
5004 {
5005 new_stmt = gimple_build_call (decl1, 1, vop0);
5006 new_temp = make_ssa_name (vec_dest, new_stmt);
5007 gimple_call_set_lhs (new_stmt, new_temp);
5008 }
5009 else
5010 {
5011 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 5012 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
5013 new_stmt = gimple_build_assign (new_temp, codecvt1,
5014 vop0);
4a00c761 5015 }
ebfd146a 5016
4a00c761 5017 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 5018 vec_oprnds0[i] = new_temp;
4a00c761 5019 }
ebfd146a 5020
4a00c761
JJ
5021 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5022 stmt, vec_dsts, gsi,
5023 slp_node, code1,
5024 &prev_stmt_info);
ebfd146a
IR
5025 }
5026
5027 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 5028 break;
ebfd146a
IR
5029 }
5030
9771b263
DN
5031 vec_oprnds0.release ();
5032 vec_oprnds1.release ();
9771b263 5033 interm_types.release ();
ebfd146a
IR
5034
5035 return true;
5036}
ff802fa1
IR
5037
5038
ebfd146a
IR
5039/* Function vectorizable_assignment.
5040
b8698a0f
L
5041 Check if STMT performs an assignment (copy) that can be vectorized.
5042 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5043 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5044 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5045
5046static bool
355fe088
TS
5047vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5048 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
5049{
5050 tree vec_dest;
5051 tree scalar_dest;
5052 tree op;
5053 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
5054 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5055 tree new_temp;
355fe088 5056 gimple *def_stmt;
4fc5ebf1
JG
5057 enum vect_def_type dt[1] = {vect_unknown_def_type};
5058 int ndts = 1;
ebfd146a 5059 int ncopies;
f18b55bd 5060 int i, j;
6e1aa848 5061 vec<tree> vec_oprnds = vNULL;
ebfd146a 5062 tree vop;
a70d6342 5063 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5064 vec_info *vinfo = stmt_info->vinfo;
355fe088 5065 gimple *new_stmt = NULL;
f18b55bd 5066 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
5067 enum tree_code code;
5068 tree vectype_in;
ebfd146a 5069
a70d6342 5070 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5071 return false;
5072
66c16fd9
RB
5073 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5074 && ! vec_stmt)
ebfd146a
IR
5075 return false;
5076
5077 /* Is vectorizable assignment? */
5078 if (!is_gimple_assign (stmt))
5079 return false;
5080
5081 scalar_dest = gimple_assign_lhs (stmt);
5082 if (TREE_CODE (scalar_dest) != SSA_NAME)
5083 return false;
5084
fde9c428 5085 code = gimple_assign_rhs_code (stmt);
ebfd146a 5086 if (gimple_assign_single_p (stmt)
fde9c428
RG
5087 || code == PAREN_EXPR
5088 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
5089 op = gimple_assign_rhs1 (stmt);
5090 else
5091 return false;
5092
7b7ec6c5
RG
5093 if (code == VIEW_CONVERT_EXPR)
5094 op = TREE_OPERAND (op, 0);
5095
465c8c19 5096 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928686b1 5097 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
5098
5099 /* Multiple types in SLP are handled by creating the appropriate number of
5100 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5101 case of SLP. */
fce57248 5102 if (slp_node)
465c8c19
JJ
5103 ncopies = 1;
5104 else
e8f142e2 5105 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
5106
5107 gcc_assert (ncopies >= 1);
5108
81c40241 5109 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 5110 {
73fbfcad 5111 if (dump_enabled_p ())
78c60e3d 5112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5113 "use not simple.\n");
ebfd146a
IR
5114 return false;
5115 }
5116
fde9c428
RG
5117 /* We can handle NOP_EXPR conversions that do not change the number
5118 of elements or the vector size. */
7b7ec6c5
RG
5119 if ((CONVERT_EXPR_CODE_P (code)
5120 || code == VIEW_CONVERT_EXPR)
fde9c428 5121 && (!vectype_in
928686b1 5122 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
cf098191
RS
5123 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5124 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
fde9c428
RG
5125 return false;
5126
7b7b1813
RG
5127 /* We do not handle bit-precision changes. */
5128 if ((CONVERT_EXPR_CODE_P (code)
5129 || code == VIEW_CONVERT_EXPR)
5130 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2be65d9e
RS
5131 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5132 || !type_has_mode_precision_p (TREE_TYPE (op)))
7b7b1813
RG
5133 /* But a conversion that does not change the bit-pattern is ok. */
5134 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5135 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
5136 && TYPE_UNSIGNED (TREE_TYPE (op)))
5137 /* Conversion between boolean types of different sizes is
5138 a simple assignment in case their vectypes are same
5139 boolean vectors. */
5140 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5141 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 5142 {
73fbfcad 5143 if (dump_enabled_p ())
78c60e3d
SS
5144 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5145 "type conversion to/from bit-precision "
e645e942 5146 "unsupported.\n");
7b7b1813
RG
5147 return false;
5148 }
5149
ebfd146a
IR
5150 if (!vec_stmt) /* transformation not required. */
5151 {
5152 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 5153 if (dump_enabled_p ())
78c60e3d 5154 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5155 "=== vectorizable_assignment ===\n");
78604de0
RB
5156 if (!slp_node)
5157 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5158 return true;
5159 }
5160
67b8dbac 5161 /* Transform. */
73fbfcad 5162 if (dump_enabled_p ())
e645e942 5163 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
5164
5165 /* Handle def. */
5166 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5167
5168 /* Handle use. */
f18b55bd 5169 for (j = 0; j < ncopies; j++)
ebfd146a 5170 {
f18b55bd
IR
5171 /* Handle uses. */
5172 if (j == 0)
306b0c92 5173 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
f18b55bd
IR
5174 else
5175 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5176
5177 /* Arguments are ready. create the new vector stmt. */
9771b263 5178 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 5179 {
7b7ec6c5
RG
5180 if (CONVERT_EXPR_CODE_P (code)
5181 || code == VIEW_CONVERT_EXPR)
4a73490d 5182 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
5183 new_stmt = gimple_build_assign (vec_dest, vop);
5184 new_temp = make_ssa_name (vec_dest, new_stmt);
5185 gimple_assign_set_lhs (new_stmt, new_temp);
5186 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5187 if (slp_node)
9771b263 5188 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 5189 }
ebfd146a
IR
5190
5191 if (slp_node)
f18b55bd
IR
5192 continue;
5193
5194 if (j == 0)
5195 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5196 else
5197 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5198
5199 prev_stmt_info = vinfo_for_stmt (new_stmt);
5200 }
b8698a0f 5201
9771b263 5202 vec_oprnds.release ();
ebfd146a
IR
5203 return true;
5204}
5205
9dc3f7de 5206
1107f3ae
IR
5207/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5208 either as shift by a scalar or by a vector. */
5209
5210bool
5211vect_supportable_shift (enum tree_code code, tree scalar_type)
5212{
5213
ef4bddc2 5214 machine_mode vec_mode;
1107f3ae
IR
5215 optab optab;
5216 int icode;
5217 tree vectype;
5218
5219 vectype = get_vectype_for_scalar_type (scalar_type);
5220 if (!vectype)
5221 return false;
5222
5223 optab = optab_for_tree_code (code, vectype, optab_scalar);
5224 if (!optab
5225 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5226 {
5227 optab = optab_for_tree_code (code, vectype, optab_vector);
5228 if (!optab
5229 || (optab_handler (optab, TYPE_MODE (vectype))
5230 == CODE_FOR_nothing))
5231 return false;
5232 }
5233
5234 vec_mode = TYPE_MODE (vectype);
5235 icode = (int) optab_handler (optab, vec_mode);
5236 if (icode == CODE_FOR_nothing)
5237 return false;
5238
5239 return true;
5240}
5241
5242
9dc3f7de
IR
5243/* Function vectorizable_shift.
5244
5245 Check if STMT performs a shift operation that can be vectorized.
5246 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5247 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5248 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5249
5250static bool
355fe088
TS
5251vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5252 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
5253{
5254 tree vec_dest;
5255 tree scalar_dest;
5256 tree op0, op1 = NULL;
5257 tree vec_oprnd1 = NULL_TREE;
5258 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5259 tree vectype;
5260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5261 enum tree_code code;
ef4bddc2 5262 machine_mode vec_mode;
9dc3f7de
IR
5263 tree new_temp;
5264 optab optab;
5265 int icode;
ef4bddc2 5266 machine_mode optab_op2_mode;
355fe088 5267 gimple *def_stmt;
9dc3f7de 5268 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5269 int ndts = 2;
355fe088 5270 gimple *new_stmt = NULL;
9dc3f7de 5271 stmt_vec_info prev_stmt_info;
928686b1
RS
5272 poly_uint64 nunits_in;
5273 poly_uint64 nunits_out;
9dc3f7de 5274 tree vectype_out;
cede2577 5275 tree op1_vectype;
9dc3f7de
IR
5276 int ncopies;
5277 int j, i;
6e1aa848
DN
5278 vec<tree> vec_oprnds0 = vNULL;
5279 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
5280 tree vop0, vop1;
5281 unsigned int k;
49eab32e 5282 bool scalar_shift_arg = true;
9dc3f7de 5283 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5284 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
5285
5286 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5287 return false;
5288
66c16fd9
RB
5289 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5290 && ! vec_stmt)
9dc3f7de
IR
5291 return false;
5292
5293 /* Is STMT a vectorizable binary/unary operation? */
5294 if (!is_gimple_assign (stmt))
5295 return false;
5296
5297 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5298 return false;
5299
5300 code = gimple_assign_rhs_code (stmt);
5301
5302 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5303 || code == RROTATE_EXPR))
5304 return false;
5305
5306 scalar_dest = gimple_assign_lhs (stmt);
5307 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2be65d9e 5308 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
7b7b1813 5309 {
73fbfcad 5310 if (dump_enabled_p ())
78c60e3d 5311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5312 "bit-precision shifts not supported.\n");
7b7b1813
RG
5313 return false;
5314 }
9dc3f7de
IR
5315
5316 op0 = gimple_assign_rhs1 (stmt);
81c40241 5317 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 5318 {
73fbfcad 5319 if (dump_enabled_p ())
78c60e3d 5320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5321 "use not simple.\n");
9dc3f7de
IR
5322 return false;
5323 }
5324 /* If op0 is an external or constant def use a vector type with
5325 the same size as the output vector type. */
5326 if (!vectype)
5327 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5328 if (vec_stmt)
5329 gcc_assert (vectype);
5330 if (!vectype)
5331 {
73fbfcad 5332 if (dump_enabled_p ())
78c60e3d 5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5334 "no vectype for scalar type\n");
9dc3f7de
IR
5335 return false;
5336 }
5337
5338 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5339 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5340 if (maybe_ne (nunits_out, nunits_in))
9dc3f7de
IR
5341 return false;
5342
5343 op1 = gimple_assign_rhs2 (stmt);
81c40241 5344 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 5345 {
73fbfcad 5346 if (dump_enabled_p ())
78c60e3d 5347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5348 "use not simple.\n");
9dc3f7de
IR
5349 return false;
5350 }
5351
9dc3f7de
IR
5352 /* Multiple types in SLP are handled by creating the appropriate number of
5353 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5354 case of SLP. */
fce57248 5355 if (slp_node)
9dc3f7de
IR
5356 ncopies = 1;
5357 else
e8f142e2 5358 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9dc3f7de
IR
5359
5360 gcc_assert (ncopies >= 1);
5361
5362 /* Determine whether the shift amount is a vector, or scalar. If the
5363 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5364
dbfa87aa
YR
5365 if ((dt[1] == vect_internal_def
5366 || dt[1] == vect_induction_def)
5367 && !slp_node)
49eab32e
JJ
5368 scalar_shift_arg = false;
5369 else if (dt[1] == vect_constant_def
5370 || dt[1] == vect_external_def
5371 || dt[1] == vect_internal_def)
5372 {
5373 /* In SLP, need to check whether the shift count is the same,
5374 in loops if it is a constant or invariant, it is always
5375 a scalar shift. */
5376 if (slp_node)
5377 {
355fe088
TS
5378 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5379 gimple *slpstmt;
49eab32e 5380
9771b263 5381 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
5382 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5383 scalar_shift_arg = false;
5384 }
60d393e8
RB
5385
5386 /* If the shift amount is computed by a pattern stmt we cannot
5387 use the scalar amount directly thus give up and use a vector
5388 shift. */
5389 if (dt[1] == vect_internal_def)
5390 {
5391 gimple *def = SSA_NAME_DEF_STMT (op1);
5392 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5393 scalar_shift_arg = false;
5394 }
49eab32e
JJ
5395 }
5396 else
5397 {
73fbfcad 5398 if (dump_enabled_p ())
78c60e3d 5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5400 "operand mode requires invariant argument.\n");
49eab32e
JJ
5401 return false;
5402 }
5403
9dc3f7de 5404 /* Vector shifted by vector. */
49eab32e 5405 if (!scalar_shift_arg)
9dc3f7de
IR
5406 {
5407 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 5408 if (dump_enabled_p ())
78c60e3d 5409 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5410 "vector/vector shift/rotate found.\n");
78c60e3d 5411
aa948027
JJ
5412 if (!op1_vectype)
5413 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5414 if (op1_vectype == NULL_TREE
5415 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 5416 {
73fbfcad 5417 if (dump_enabled_p ())
78c60e3d
SS
5418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5419 "unusable type for last operand in"
e645e942 5420 " vector/vector shift/rotate.\n");
cede2577
JJ
5421 return false;
5422 }
9dc3f7de
IR
5423 }
5424 /* See if the machine has a vector shifted by scalar insn and if not
5425 then see if it has a vector shifted by vector insn. */
49eab32e 5426 else
9dc3f7de
IR
5427 {
5428 optab = optab_for_tree_code (code, vectype, optab_scalar);
5429 if (optab
5430 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5431 {
73fbfcad 5432 if (dump_enabled_p ())
78c60e3d 5433 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5434 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
5435 }
5436 else
5437 {
5438 optab = optab_for_tree_code (code, vectype, optab_vector);
5439 if (optab
5440 && (optab_handler (optab, TYPE_MODE (vectype))
5441 != CODE_FOR_nothing))
5442 {
49eab32e
JJ
5443 scalar_shift_arg = false;
5444
73fbfcad 5445 if (dump_enabled_p ())
78c60e3d 5446 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5447 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
5448
5449 /* Unlike the other binary operators, shifts/rotates have
5450 the rhs being int, instead of the same type as the lhs,
5451 so make sure the scalar is the right type if we are
aa948027 5452 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
5453 if (dt[1] == vect_constant_def)
5454 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
5455 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5456 TREE_TYPE (op1)))
5457 {
5458 if (slp_node
5459 && TYPE_MODE (TREE_TYPE (vectype))
5460 != TYPE_MODE (TREE_TYPE (op1)))
5461 {
73fbfcad 5462 if (dump_enabled_p ())
78c60e3d
SS
5463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5464 "unusable type for last operand in"
e645e942 5465 " vector/vector shift/rotate.\n");
21c0a521 5466 return false;
aa948027
JJ
5467 }
5468 if (vec_stmt && !slp_node)
5469 {
5470 op1 = fold_convert (TREE_TYPE (vectype), op1);
5471 op1 = vect_init_vector (stmt, op1,
5472 TREE_TYPE (vectype), NULL);
5473 }
5474 }
9dc3f7de
IR
5475 }
5476 }
5477 }
9dc3f7de
IR
5478
5479 /* Supportable by target? */
5480 if (!optab)
5481 {
73fbfcad 5482 if (dump_enabled_p ())
78c60e3d 5483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5484 "no optab.\n");
9dc3f7de
IR
5485 return false;
5486 }
5487 vec_mode = TYPE_MODE (vectype);
5488 icode = (int) optab_handler (optab, vec_mode);
5489 if (icode == CODE_FOR_nothing)
5490 {
73fbfcad 5491 if (dump_enabled_p ())
78c60e3d 5492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5493 "op not supported by target.\n");
9dc3f7de 5494 /* Check only during analysis. */
cf098191 5495 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb
RS
5496 || (!vec_stmt
5497 && !vect_worthwhile_without_simd_p (vinfo, code)))
9dc3f7de 5498 return false;
73fbfcad 5499 if (dump_enabled_p ())
e645e942
TJ
5500 dump_printf_loc (MSG_NOTE, vect_location,
5501 "proceeding using word mode.\n");
9dc3f7de
IR
5502 }
5503
5504 /* Worthwhile without SIMD support? Check only during analysis. */
ca09abcb
RS
5505 if (!vec_stmt
5506 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5507 && !vect_worthwhile_without_simd_p (vinfo, code))
9dc3f7de 5508 {
73fbfcad 5509 if (dump_enabled_p ())
78c60e3d 5510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5511 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
5512 return false;
5513 }
5514
5515 if (!vec_stmt) /* transformation not required. */
5516 {
5517 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 5518 if (dump_enabled_p ())
e645e942
TJ
5519 dump_printf_loc (MSG_NOTE, vect_location,
5520 "=== vectorizable_shift ===\n");
78604de0
RB
5521 if (!slp_node)
5522 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
9dc3f7de
IR
5523 return true;
5524 }
5525
67b8dbac 5526 /* Transform. */
9dc3f7de 5527
73fbfcad 5528 if (dump_enabled_p ())
78c60e3d 5529 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5530 "transform binary/unary operation.\n");
9dc3f7de
IR
5531
5532 /* Handle def. */
5533 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5534
9dc3f7de
IR
5535 prev_stmt_info = NULL;
5536 for (j = 0; j < ncopies; j++)
5537 {
5538 /* Handle uses. */
5539 if (j == 0)
5540 {
5541 if (scalar_shift_arg)
5542 {
5543 /* Vector shl and shr insn patterns can be defined with scalar
5544 operand 2 (shift operand). In this case, use constant or loop
5545 invariant op1 directly, without extending it to vector mode
5546 first. */
5547 optab_op2_mode = insn_data[icode].operand[2].mode;
5548 if (!VECTOR_MODE_P (optab_op2_mode))
5549 {
73fbfcad 5550 if (dump_enabled_p ())
78c60e3d 5551 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5552 "operand 1 using scalar mode.\n");
9dc3f7de 5553 vec_oprnd1 = op1;
8930f723 5554 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 5555 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5556 if (slp_node)
5557 {
5558 /* Store vec_oprnd1 for every vector stmt to be created
5559 for SLP_NODE. We check during the analysis that all
5560 the shift arguments are the same.
5561 TODO: Allow different constants for different vector
5562 stmts generated for an SLP instance. */
5563 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 5564 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
5565 }
5566 }
5567 }
5568
5569 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5570 (a special case for certain kind of vector shifts); otherwise,
5571 operand 1 should be of a vector type (the usual case). */
5572 if (vec_oprnd1)
5573 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5574 slp_node);
9dc3f7de
IR
5575 else
5576 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5577 slp_node);
9dc3f7de
IR
5578 }
5579 else
5580 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5581
5582 /* Arguments are ready. Create the new vector stmt. */
9771b263 5583 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 5584 {
9771b263 5585 vop1 = vec_oprnds1[i];
0d0e4a03 5586 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
5587 new_temp = make_ssa_name (vec_dest, new_stmt);
5588 gimple_assign_set_lhs (new_stmt, new_temp);
5589 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5590 if (slp_node)
9771b263 5591 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
5592 }
5593
5594 if (slp_node)
5595 continue;
5596
5597 if (j == 0)
5598 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5599 else
5600 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5601 prev_stmt_info = vinfo_for_stmt (new_stmt);
5602 }
5603
9771b263
DN
5604 vec_oprnds0.release ();
5605 vec_oprnds1.release ();
9dc3f7de
IR
5606
5607 return true;
5608}
5609
5610
ebfd146a
IR
5611/* Function vectorizable_operation.
5612
16949072
RG
5613 Check if STMT performs a binary, unary or ternary operation that can
5614 be vectorized.
b8698a0f 5615 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5616 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5617 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5618
5619static bool
355fe088
TS
5620vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5621 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 5622{
00f07b86 5623 tree vec_dest;
ebfd146a 5624 tree scalar_dest;
16949072 5625 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 5626 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 5627 tree vectype;
ebfd146a 5628 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
0eb952ea 5629 enum tree_code code, orig_code;
ef4bddc2 5630 machine_mode vec_mode;
ebfd146a
IR
5631 tree new_temp;
5632 int op_type;
00f07b86 5633 optab optab;
523ba738 5634 bool target_support_p;
355fe088 5635 gimple *def_stmt;
16949072
RG
5636 enum vect_def_type dt[3]
5637 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 5638 int ndts = 3;
355fe088 5639 gimple *new_stmt = NULL;
ebfd146a 5640 stmt_vec_info prev_stmt_info;
928686b1
RS
5641 poly_uint64 nunits_in;
5642 poly_uint64 nunits_out;
ebfd146a
IR
5643 tree vectype_out;
5644 int ncopies;
5645 int j, i;
6e1aa848
DN
5646 vec<tree> vec_oprnds0 = vNULL;
5647 vec<tree> vec_oprnds1 = vNULL;
5648 vec<tree> vec_oprnds2 = vNULL;
16949072 5649 tree vop0, vop1, vop2;
a70d6342 5650 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5651 vec_info *vinfo = stmt_info->vinfo;
a70d6342 5652
a70d6342 5653 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5654 return false;
5655
66c16fd9
RB
5656 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5657 && ! vec_stmt)
ebfd146a
IR
5658 return false;
5659
5660 /* Is STMT a vectorizable binary/unary operation? */
5661 if (!is_gimple_assign (stmt))
5662 return false;
5663
5664 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5665 return false;
5666
0eb952ea 5667 orig_code = code = gimple_assign_rhs_code (stmt);
ebfd146a 5668
1af4ebf5
MG
5669 /* For pointer addition and subtraction, we should use the normal
5670 plus and minus for the vector operation. */
ebfd146a
IR
5671 if (code == POINTER_PLUS_EXPR)
5672 code = PLUS_EXPR;
1af4ebf5
MG
5673 if (code == POINTER_DIFF_EXPR)
5674 code = MINUS_EXPR;
ebfd146a
IR
5675
5676 /* Support only unary or binary operations. */
5677 op_type = TREE_CODE_LENGTH (code);
16949072 5678 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 5679 {
73fbfcad 5680 if (dump_enabled_p ())
78c60e3d 5681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5682 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 5683 op_type);
ebfd146a
IR
5684 return false;
5685 }
5686
b690cc0f
RG
5687 scalar_dest = gimple_assign_lhs (stmt);
5688 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5689
7b7b1813
RG
5690 /* Most operations cannot handle bit-precision types without extra
5691 truncations. */
045c1278 5692 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
2be65d9e 5693 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
7b7b1813
RG
5694 /* Exception are bitwise binary operations. */
5695 && code != BIT_IOR_EXPR
5696 && code != BIT_XOR_EXPR
5697 && code != BIT_AND_EXPR)
5698 {
73fbfcad 5699 if (dump_enabled_p ())
78c60e3d 5700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5701 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
5702 return false;
5703 }
5704
ebfd146a 5705 op0 = gimple_assign_rhs1 (stmt);
81c40241 5706 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 5707 {
73fbfcad 5708 if (dump_enabled_p ())
78c60e3d 5709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5710 "use not simple.\n");
ebfd146a
IR
5711 return false;
5712 }
b690cc0f
RG
5713 /* If op0 is an external or constant def use a vector type with
5714 the same size as the output vector type. */
5715 if (!vectype)
b036c6c5
IE
5716 {
5717 /* For boolean type we cannot determine vectype by
5718 invariant value (don't know whether it is a vector
5719 of booleans or vector of integers). We use output
5720 vectype because operations on boolean don't change
5721 type. */
2568d8a1 5722 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
b036c6c5 5723 {
2568d8a1 5724 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
b036c6c5
IE
5725 {
5726 if (dump_enabled_p ())
5727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5728 "not supported operation on bool value.\n");
5729 return false;
5730 }
5731 vectype = vectype_out;
5732 }
5733 else
5734 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5735 }
7d8930a0
IR
5736 if (vec_stmt)
5737 gcc_assert (vectype);
5738 if (!vectype)
5739 {
73fbfcad 5740 if (dump_enabled_p ())
7d8930a0 5741 {
78c60e3d
SS
5742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5743 "no vectype for scalar type ");
5744 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5745 TREE_TYPE (op0));
e645e942 5746 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
5747 }
5748
5749 return false;
5750 }
b690cc0f
RG
5751
5752 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5753 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
928686b1 5754 if (maybe_ne (nunits_out, nunits_in))
b690cc0f 5755 return false;
ebfd146a 5756
16949072 5757 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
5758 {
5759 op1 = gimple_assign_rhs2 (stmt);
81c40241 5760 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 5761 {
73fbfcad 5762 if (dump_enabled_p ())
78c60e3d 5763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5764 "use not simple.\n");
ebfd146a
IR
5765 return false;
5766 }
5767 }
16949072
RG
5768 if (op_type == ternary_op)
5769 {
5770 op2 = gimple_assign_rhs3 (stmt);
81c40241 5771 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 5772 {
73fbfcad 5773 if (dump_enabled_p ())
78c60e3d 5774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5775 "use not simple.\n");
16949072
RG
5776 return false;
5777 }
5778 }
ebfd146a 5779
b690cc0f 5780 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 5781 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 5782 case of SLP. */
fce57248 5783 if (slp_node)
b690cc0f
RG
5784 ncopies = 1;
5785 else
e8f142e2 5786 ncopies = vect_get_num_copies (loop_vinfo, vectype);
b690cc0f
RG
5787
5788 gcc_assert (ncopies >= 1);
5789
9dc3f7de 5790 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
5791 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5792 || code == RROTATE_EXPR)
9dc3f7de 5793 return false;
ebfd146a 5794
ebfd146a 5795 /* Supportable by target? */
00f07b86
RH
5796
5797 vec_mode = TYPE_MODE (vectype);
5798 if (code == MULT_HIGHPART_EXPR)
523ba738 5799 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
5800 else
5801 {
5802 optab = optab_for_tree_code (code, vectype, optab_default);
5803 if (!optab)
5deb57cb 5804 {
73fbfcad 5805 if (dump_enabled_p ())
78c60e3d 5806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5807 "no optab.\n");
00f07b86 5808 return false;
5deb57cb 5809 }
523ba738
RS
5810 target_support_p = (optab_handler (optab, vec_mode)
5811 != CODE_FOR_nothing);
5deb57cb
JJ
5812 }
5813
523ba738 5814 if (!target_support_p)
ebfd146a 5815 {
73fbfcad 5816 if (dump_enabled_p ())
78c60e3d 5817 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5818 "op not supported by target.\n");
ebfd146a 5819 /* Check only during analysis. */
cf098191 5820 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
ca09abcb 5821 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
ebfd146a 5822 return false;
73fbfcad 5823 if (dump_enabled_p ())
e645e942
TJ
5824 dump_printf_loc (MSG_NOTE, vect_location,
5825 "proceeding using word mode.\n");
383d9c83
IR
5826 }
5827
4a00c761 5828 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5829 if (!VECTOR_MODE_P (vec_mode)
5830 && !vec_stmt
ca09abcb 5831 && !vect_worthwhile_without_simd_p (vinfo, code))
7d8930a0 5832 {
73fbfcad 5833 if (dump_enabled_p ())
78c60e3d 5834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5835 "not worthwhile without SIMD support.\n");
e34842c6 5836 return false;
7d8930a0 5837 }
ebfd146a 5838
ebfd146a
IR
5839 if (!vec_stmt) /* transformation not required. */
5840 {
4a00c761 5841 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5842 if (dump_enabled_p ())
78c60e3d 5843 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5844 "=== vectorizable_operation ===\n");
78604de0
RB
5845 if (!slp_node)
5846 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
ebfd146a
IR
5847 return true;
5848 }
5849
67b8dbac 5850 /* Transform. */
ebfd146a 5851
73fbfcad 5852 if (dump_enabled_p ())
78c60e3d 5853 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5854 "transform binary/unary operation.\n");
383d9c83 5855
ebfd146a 5856 /* Handle def. */
00f07b86 5857 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5858
0eb952ea
JJ
5859 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5860 vectors with unsigned elements, but the result is signed. So, we
5861 need to compute the MINUS_EXPR into vectype temporary and
5862 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5863 tree vec_cvt_dest = NULL_TREE;
5864 if (orig_code == POINTER_DIFF_EXPR)
5865 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5866
ebfd146a
IR
5867 /* In case the vectorization factor (VF) is bigger than the number
5868 of elements that we can fit in a vectype (nunits), we have to generate
5869 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5870 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5871 from one copy of the vector stmt to the next, in the field
5872 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5873 stages to find the correct vector defs to be used when vectorizing
5874 stmts that use the defs of the current stmt. The example below
5875 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5876 we need to create 4 vectorized stmts):
5877
5878 before vectorization:
5879 RELATED_STMT VEC_STMT
5880 S1: x = memref - -
5881 S2: z = x + 1 - -
5882
5883 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5884 there):
5885 RELATED_STMT VEC_STMT
5886 VS1_0: vx0 = memref0 VS1_1 -
5887 VS1_1: vx1 = memref1 VS1_2 -
5888 VS1_2: vx2 = memref2 VS1_3 -
5889 VS1_3: vx3 = memref3 - -
5890 S1: x = load - VS1_0
5891 S2: z = x + 1 - -
5892
5893 step2: vectorize stmt S2 (done here):
5894 To vectorize stmt S2 we first need to find the relevant vector
5895 def for the first operand 'x'. This is, as usual, obtained from
5896 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5897 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5898 relevant vector def 'vx0'. Having found 'vx0' we can generate
5899 the vector stmt VS2_0, and as usual, record it in the
5900 STMT_VINFO_VEC_STMT of stmt S2.
5901 When creating the second copy (VS2_1), we obtain the relevant vector
5902 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5903 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5904 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5905 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5906 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5907 chain of stmts and pointers:
5908 RELATED_STMT VEC_STMT
5909 VS1_0: vx0 = memref0 VS1_1 -
5910 VS1_1: vx1 = memref1 VS1_2 -
5911 VS1_2: vx2 = memref2 VS1_3 -
5912 VS1_3: vx3 = memref3 - -
5913 S1: x = load - VS1_0
5914 VS2_0: vz0 = vx0 + v1 VS2_1 -
5915 VS2_1: vz1 = vx1 + v1 VS2_2 -
5916 VS2_2: vz2 = vx2 + v1 VS2_3 -
5917 VS2_3: vz3 = vx3 + v1 - -
5918 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5919
5920 prev_stmt_info = NULL;
5921 for (j = 0; j < ncopies; j++)
5922 {
5923 /* Handle uses. */
5924 if (j == 0)
4a00c761
JJ
5925 {
5926 if (op_type == binary_op || op_type == ternary_op)
5927 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
306b0c92 5928 slp_node);
4a00c761
JJ
5929 else
5930 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
306b0c92 5931 slp_node);
4a00c761 5932 if (op_type == ternary_op)
c392943c 5933 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
306b0c92 5934 slp_node);
4a00c761 5935 }
ebfd146a 5936 else
4a00c761
JJ
5937 {
5938 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5939 if (op_type == ternary_op)
5940 {
9771b263
DN
5941 tree vec_oprnd = vec_oprnds2.pop ();
5942 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5943 vec_oprnd));
4a00c761
JJ
5944 }
5945 }
5946
5947 /* Arguments are ready. Create the new vector stmt. */
9771b263 5948 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5949 {
4a00c761 5950 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5951 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5952 vop2 = ((op_type == ternary_op)
9771b263 5953 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5954 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5955 new_temp = make_ssa_name (vec_dest, new_stmt);
5956 gimple_assign_set_lhs (new_stmt, new_temp);
5957 vect_finish_stmt_generation (stmt, new_stmt, gsi);
0eb952ea
JJ
5958 if (vec_cvt_dest)
5959 {
5960 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5961 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5962 new_temp);
5963 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5964 gimple_assign_set_lhs (new_stmt, new_temp);
5965 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5966 }
4a00c761 5967 if (slp_node)
9771b263 5968 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5969 }
5970
4a00c761
JJ
5971 if (slp_node)
5972 continue;
5973
5974 if (j == 0)
5975 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5976 else
5977 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5978 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5979 }
5980
9771b263
DN
5981 vec_oprnds0.release ();
5982 vec_oprnds1.release ();
5983 vec_oprnds2.release ();
ebfd146a 5984
ebfd146a
IR
5985 return true;
5986}
5987
f702e7d4 5988/* A helper function to ensure data reference DR's base alignment. */
c716e67f
XDL
5989
5990static void
f702e7d4 5991ensure_base_align (struct data_reference *dr)
c716e67f
XDL
5992{
5993 if (!dr->aux)
5994 return;
5995
52639a61 5996 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f 5997 {
52639a61 5998 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5999
f702e7d4
RS
6000 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6001
428f0c67 6002 if (decl_in_symtab_p (base_decl))
f702e7d4 6003 symtab_node::get (base_decl)->increase_alignment (align_base_to);
428f0c67
JH
6004 else
6005 {
f702e7d4 6006 SET_DECL_ALIGN (base_decl, align_base_to);
428f0c67
JH
6007 DECL_USER_ALIGN (base_decl) = 1;
6008 }
52639a61 6009 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
6010 }
6011}
6012
ebfd146a 6013
44fc7854
BE
6014/* Function get_group_alias_ptr_type.
6015
6016 Return the alias type for the group starting at FIRST_STMT. */
6017
6018static tree
6019get_group_alias_ptr_type (gimple *first_stmt)
6020{
6021 struct data_reference *first_dr, *next_dr;
6022 gimple *next_stmt;
6023
6024 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6025 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6026 while (next_stmt)
6027 {
6028 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6029 if (get_alias_set (DR_REF (first_dr))
6030 != get_alias_set (DR_REF (next_dr)))
6031 {
6032 if (dump_enabled_p ())
6033 dump_printf_loc (MSG_NOTE, vect_location,
6034 "conflicting alias set types.\n");
6035 return ptr_type_node;
6036 }
6037 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6038 }
6039 return reference_alias_ptr_type (DR_REF (first_dr));
6040}
6041
6042
ebfd146a
IR
6043/* Function vectorizable_store.
6044
b8698a0f
L
6045 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6046 can be vectorized.
6047 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6048 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6049 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6050
6051static bool
355fe088 6052vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6053 slp_tree slp_node)
ebfd146a 6054{
ebfd146a
IR
6055 tree data_ref;
6056 tree op;
6057 tree vec_oprnd = NULL_TREE;
6058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6059 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6060 tree elem_type;
ebfd146a 6061 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6062 struct loop *loop = NULL;
ef4bddc2 6063 machine_mode vec_mode;
ebfd146a
IR
6064 tree dummy;
6065 enum dr_alignment_support alignment_support_scheme;
355fe088 6066 gimple *def_stmt;
929b4411
RS
6067 enum vect_def_type rhs_dt = vect_unknown_def_type;
6068 enum vect_def_type mask_dt = vect_unknown_def_type;
ebfd146a
IR
6069 stmt_vec_info prev_stmt_info = NULL;
6070 tree dataref_ptr = NULL_TREE;
74bf76ed 6071 tree dataref_offset = NULL_TREE;
355fe088 6072 gimple *ptr_incr = NULL;
ebfd146a
IR
6073 int ncopies;
6074 int j;
2de001ee
RS
6075 gimple *next_stmt, *first_stmt;
6076 bool grouped_store;
ebfd146a 6077 unsigned int group_size, i;
6e1aa848
DN
6078 vec<tree> oprnds = vNULL;
6079 vec<tree> result_chain = vNULL;
ebfd146a 6080 bool inv_p;
09dfa495 6081 tree offset = NULL_TREE;
6e1aa848 6082 vec<tree> vec_oprnds = vNULL;
ebfd146a 6083 bool slp = (slp_node != NULL);
ebfd146a 6084 unsigned int vec_num;
a70d6342 6085 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 6086 vec_info *vinfo = stmt_info->vinfo;
272c6793 6087 tree aggr_type;
134c85ca 6088 gather_scatter_info gs_info;
355fe088 6089 gimple *new_stmt;
d9f21f6a 6090 poly_uint64 vf;
2de001ee 6091 vec_load_store_type vls_type;
44fc7854 6092 tree ref_type;
a70d6342 6093
a70d6342 6094 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
6095 return false;
6096
66c16fd9
RB
6097 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6098 && ! vec_stmt)
ebfd146a
IR
6099 return false;
6100
6101 /* Is vectorizable store? */
6102
c3a8f964
RS
6103 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6104 if (is_gimple_assign (stmt))
6105 {
6106 tree scalar_dest = gimple_assign_lhs (stmt);
6107 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6108 && is_pattern_stmt_p (stmt_info))
6109 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6110 if (TREE_CODE (scalar_dest) != ARRAY_REF
6111 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6112 && TREE_CODE (scalar_dest) != INDIRECT_REF
6113 && TREE_CODE (scalar_dest) != COMPONENT_REF
6114 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6115 && TREE_CODE (scalar_dest) != REALPART_EXPR
6116 && TREE_CODE (scalar_dest) != MEM_REF)
6117 return false;
6118 }
6119 else
6120 {
6121 gcall *call = dyn_cast <gcall *> (stmt);
f307441a
RS
6122 if (!call || !gimple_call_internal_p (call))
6123 return false;
6124
6125 internal_fn ifn = gimple_call_internal_fn (call);
6126 if (!internal_store_fn_p (ifn))
c3a8f964 6127 return false;
ebfd146a 6128
c3a8f964
RS
6129 if (slp_node != NULL)
6130 {
6131 if (dump_enabled_p ())
6132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6133 "SLP of masked stores not supported.\n");
6134 return false;
6135 }
6136
f307441a
RS
6137 int mask_index = internal_fn_mask_index (ifn);
6138 if (mask_index >= 0)
6139 {
6140 mask = gimple_call_arg (call, mask_index);
929b4411
RS
6141 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6142 &mask_vectype))
f307441a
RS
6143 return false;
6144 }
c3a8f964
RS
6145 }
6146
6147 op = vect_get_store_rhs (stmt);
ebfd146a 6148
fce57248
RS
6149 /* Cannot have hybrid store SLP -- that would mean storing to the
6150 same location twice. */
6151 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6152
f4d09712 6153 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
4d694b27 6154 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19
JJ
6155
6156 if (loop_vinfo)
b17dc4d4
RB
6157 {
6158 loop = LOOP_VINFO_LOOP (loop_vinfo);
6159 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6160 }
6161 else
6162 vf = 1;
465c8c19
JJ
6163
6164 /* Multiple types in SLP are handled by creating the appropriate number of
6165 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6166 case of SLP. */
fce57248 6167 if (slp)
465c8c19
JJ
6168 ncopies = 1;
6169 else
e8f142e2 6170 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
6171
6172 gcc_assert (ncopies >= 1);
6173
6174 /* FORNOW. This restriction should be relaxed. */
6175 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6176 {
6177 if (dump_enabled_p ())
6178 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6179 "multiple types in nested loop.\n");
6180 return false;
6181 }
6182
929b4411 6183 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
f4d09712
KY
6184 return false;
6185
272c6793 6186 elem_type = TREE_TYPE (vectype);
ebfd146a 6187 vec_mode = TYPE_MODE (vectype);
7b7b1813 6188
ebfd146a
IR
6189 if (!STMT_VINFO_DATA_REF (stmt_info))
6190 return false;
6191
2de001ee 6192 vect_memory_access_type memory_access_type;
7e11fc7f 6193 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
2de001ee
RS
6194 &memory_access_type, &gs_info))
6195 return false;
3bab6342 6196
c3a8f964
RS
6197 if (mask)
6198 {
7e11fc7f
RS
6199 if (memory_access_type == VMAT_CONTIGUOUS)
6200 {
6201 if (!VECTOR_MODE_P (vec_mode)
6202 || !can_vec_mask_load_store_p (vec_mode,
6203 TYPE_MODE (mask_vectype), false))
6204 return false;
6205 }
f307441a
RS
6206 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6207 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
c3a8f964
RS
6208 {
6209 if (dump_enabled_p ())
6210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6211 "unsupported access type for masked store.\n");
6212 return false;
6213 }
c3a8f964
RS
6214 }
6215 else
6216 {
6217 /* FORNOW. In some cases can vectorize even if data-type not supported
6218 (e.g. - array initialization with 0). */
6219 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6220 return false;
6221 }
6222
f307441a 6223 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
b5ec4de7
RS
6224 && memory_access_type != VMAT_GATHER_SCATTER
6225 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7cfb4d93
RS
6226 if (grouped_store)
6227 {
6228 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6229 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6230 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6231 }
6232 else
6233 {
6234 first_stmt = stmt;
6235 first_dr = dr;
6236 group_size = vec_num = 1;
6237 }
6238
ebfd146a
IR
6239 if (!vec_stmt) /* transformation not required. */
6240 {
2de001ee 6241 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
6242
6243 if (loop_vinfo
6244 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6245 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
bfaa08b7 6246 memory_access_type, &gs_info);
7cfb4d93 6247
ebfd146a 6248 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c 6249 /* The SLP costs are calculated during SLP analysis. */
78604de0 6250 if (!slp_node)
9ce4345a
RS
6251 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6252 vls_type, NULL, NULL, NULL);
ebfd146a
IR
6253 return true;
6254 }
2de001ee 6255 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
ebfd146a 6256
67b8dbac 6257 /* Transform. */
ebfd146a 6258
f702e7d4 6259 ensure_base_align (dr);
c716e67f 6260
f307441a 6261 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
3bab6342 6262 {
c3a8f964 6263 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
134c85ca 6264 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
3bab6342
AT
6265 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6266 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6267 edge pe = loop_preheader_edge (loop);
6268 gimple_seq seq;
6269 basic_block new_bb;
6270 enum { NARROW, NONE, WIDEN } modifier;
4d694b27
RS
6271 poly_uint64 scatter_off_nunits
6272 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
3bab6342 6273
4d694b27 6274 if (known_eq (nunits, scatter_off_nunits))
3bab6342 6275 modifier = NONE;
4d694b27 6276 else if (known_eq (nunits * 2, scatter_off_nunits))
3bab6342 6277 {
3bab6342
AT
6278 modifier = WIDEN;
6279
4d694b27
RS
6280 /* Currently gathers and scatters are only supported for
6281 fixed-length vectors. */
6282 unsigned int count = scatter_off_nunits.to_constant ();
6283 vec_perm_builder sel (count, count, 1);
6284 for (i = 0; i < (unsigned int) count; ++i)
6285 sel.quick_push (i | (count / 2));
3bab6342 6286
4d694b27 6287 vec_perm_indices indices (sel, 1, count);
e3342de4
RS
6288 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6289 indices);
3bab6342
AT
6290 gcc_assert (perm_mask != NULL_TREE);
6291 }
4d694b27 6292 else if (known_eq (nunits, scatter_off_nunits * 2))
3bab6342 6293 {
3bab6342
AT
6294 modifier = NARROW;
6295
4d694b27
RS
6296 /* Currently gathers and scatters are only supported for
6297 fixed-length vectors. */
6298 unsigned int count = nunits.to_constant ();
6299 vec_perm_builder sel (count, count, 1);
6300 for (i = 0; i < (unsigned int) count; ++i)
6301 sel.quick_push (i | (count / 2));
3bab6342 6302
4d694b27 6303 vec_perm_indices indices (sel, 2, count);
e3342de4 6304 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3bab6342
AT
6305 gcc_assert (perm_mask != NULL_TREE);
6306 ncopies *= 2;
6307 }
6308 else
6309 gcc_unreachable ();
6310
134c85ca 6311 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
3bab6342
AT
6312 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6313 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6314 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6315 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6316 scaletype = TREE_VALUE (arglist);
6317
6318 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6319 && TREE_CODE (rettype) == VOID_TYPE);
6320
134c85ca 6321 ptr = fold_convert (ptrtype, gs_info.base);
3bab6342
AT
6322 if (!is_gimple_min_invariant (ptr))
6323 {
6324 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6325 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6326 gcc_assert (!new_bb);
6327 }
6328
6329 /* Currently we support only unconditional scatter stores,
6330 so mask should be all ones. */
6331 mask = build_int_cst (masktype, -1);
6332 mask = vect_init_vector (stmt, mask, masktype, NULL);
6333
134c85ca 6334 scale = build_int_cst (scaletype, gs_info.scale);
3bab6342
AT
6335
6336 prev_stmt_info = NULL;
6337 for (j = 0; j < ncopies; ++j)
6338 {
6339 if (j == 0)
6340 {
6341 src = vec_oprnd1
c3a8f964 6342 = vect_get_vec_def_for_operand (op, stmt);
3bab6342 6343 op = vec_oprnd0
134c85ca 6344 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
3bab6342
AT
6345 }
6346 else if (modifier != NONE && (j & 1))
6347 {
6348 if (modifier == WIDEN)
6349 {
6350 src = vec_oprnd1
929b4411 6351 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342
AT
6352 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6353 stmt, gsi);
6354 }
6355 else if (modifier == NARROW)
6356 {
6357 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6358 stmt, gsi);
6359 op = vec_oprnd0
134c85ca
RS
6360 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6361 vec_oprnd0);
3bab6342
AT
6362 }
6363 else
6364 gcc_unreachable ();
6365 }
6366 else
6367 {
6368 src = vec_oprnd1
929b4411 6369 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
3bab6342 6370 op = vec_oprnd0
134c85ca
RS
6371 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6372 vec_oprnd0);
3bab6342
AT
6373 }
6374
6375 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6376 {
928686b1
RS
6377 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6378 TYPE_VECTOR_SUBPARTS (srctype)));
0e22bb5a 6379 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
6380 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6381 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6382 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6383 src = var;
6384 }
6385
6386 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6387 {
928686b1
RS
6388 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6389 TYPE_VECTOR_SUBPARTS (idxtype)));
0e22bb5a 6390 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
6391 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6392 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6394 op = var;
6395 }
6396
6397 new_stmt
134c85ca 6398 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
3bab6342
AT
6399
6400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6401
6402 if (prev_stmt_info == NULL)
6403 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6404 else
6405 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6406 prev_stmt_info = vinfo_for_stmt (new_stmt);
6407 }
6408 return true;
6409 }
6410
f307441a 6411 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6412 {
f307441a
RS
6413 gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6414 GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6415 }
ebfd146a 6416
f307441a
RS
6417 if (grouped_store)
6418 {
ebfd146a 6419 /* FORNOW */
a70d6342 6420 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
6421
6422 /* We vectorize all the stmts of the interleaving group when we
6423 reach the last stmt in the group. */
e14c1050
IR
6424 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6425 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
6426 && !slp)
6427 {
6428 *vec_stmt = NULL;
6429 return true;
6430 }
6431
6432 if (slp)
4b5caab7 6433 {
0d0293ac 6434 grouped_store = false;
4b5caab7
IR
6435 /* VEC_NUM is the number of vect stmts to be created for this
6436 group. */
6437 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 6438 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 6439 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 6440 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
c3a8f964 6441 op = vect_get_store_rhs (first_stmt);
4b5caab7 6442 }
ebfd146a 6443 else
4b5caab7
IR
6444 /* VEC_NUM is the number of vect stmts to be created for this
6445 group. */
ebfd146a 6446 vec_num = group_size;
44fc7854
BE
6447
6448 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a 6449 }
b8698a0f 6450 else
7cfb4d93 6451 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
b8698a0f 6452
73fbfcad 6453 if (dump_enabled_p ())
78c60e3d 6454 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6455 "transform store. ncopies = %d\n", ncopies);
ebfd146a 6456
2de001ee
RS
6457 if (memory_access_type == VMAT_ELEMENTWISE
6458 || memory_access_type == VMAT_STRIDED_SLP)
f2e2a985
MM
6459 {
6460 gimple_stmt_iterator incr_gsi;
6461 bool insert_after;
355fe088 6462 gimple *incr;
f2e2a985
MM
6463 tree offvar;
6464 tree ivstep;
6465 tree running_off;
f2e2a985
MM
6466 tree stride_base, stride_step, alias_off;
6467 tree vec_oprnd;
f502d50e 6468 unsigned int g;
4d694b27
RS
6469 /* Checked by get_load_store_type. */
6470 unsigned int const_nunits = nunits.to_constant ();
f2e2a985 6471
7cfb4d93 6472 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
f2e2a985
MM
6473 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6474
6475 stride_base
6476 = fold_build_pointer_plus
b210f45f 6477 (DR_BASE_ADDRESS (first_dr),
f2e2a985 6478 size_binop (PLUS_EXPR,
b210f45f 6479 convert_to_ptrofftype (DR_OFFSET (first_dr)),
44fc7854 6480 convert_to_ptrofftype (DR_INIT (first_dr))));
b210f45f 6481 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
f2e2a985
MM
6482
6483 /* For a store with loop-invariant (but other than power-of-2)
6484 stride (i.e. not a grouped access) like so:
6485
6486 for (i = 0; i < n; i += stride)
6487 array[i] = ...;
6488
6489 we generate a new induction variable and new stores from
6490 the components of the (vectorized) rhs:
6491
6492 for (j = 0; ; j += VF*stride)
6493 vectemp = ...;
6494 tmp1 = vectemp[0];
6495 array[j] = tmp1;
6496 tmp2 = vectemp[1];
6497 array[j + stride] = tmp2;
6498 ...
6499 */
6500
4d694b27 6501 unsigned nstores = const_nunits;
b17dc4d4 6502 unsigned lnel = 1;
cee62fee 6503 tree ltype = elem_type;
04199738 6504 tree lvectype = vectype;
cee62fee
MM
6505 if (slp)
6506 {
4d694b27
RS
6507 if (group_size < const_nunits
6508 && const_nunits % group_size == 0)
b17dc4d4 6509 {
4d694b27 6510 nstores = const_nunits / group_size;
b17dc4d4
RB
6511 lnel = group_size;
6512 ltype = build_vector_type (elem_type, group_size);
04199738
RB
6513 lvectype = vectype;
6514
6515 /* First check if vec_extract optab doesn't support extraction
6516 of vector elts directly. */
b397965c 6517 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
9da15d40
RS
6518 machine_mode vmode;
6519 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6520 || !VECTOR_MODE_P (vmode)
414fef4e 6521 || !targetm.vector_mode_supported_p (vmode)
04199738
RB
6522 || (convert_optab_handler (vec_extract_optab,
6523 TYPE_MODE (vectype), vmode)
6524 == CODE_FOR_nothing))
6525 {
6526 /* Try to avoid emitting an extract of vector elements
6527 by performing the extracts using an integer type of the
6528 same size, extracting from a vector of those and then
6529 re-interpreting it as the original vector type if
6530 supported. */
6531 unsigned lsize
6532 = group_size * GET_MODE_BITSIZE (elmode);
fffbab82 6533 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 6534 unsigned int lnunits = const_nunits / group_size;
04199738
RB
6535 /* If we can't construct such a vector fall back to
6536 element extracts from the original vector type and
6537 element size stores. */
4d694b27 6538 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 6539 && VECTOR_MODE_P (vmode)
414fef4e 6540 && targetm.vector_mode_supported_p (vmode)
04199738
RB
6541 && (convert_optab_handler (vec_extract_optab,
6542 vmode, elmode)
6543 != CODE_FOR_nothing))
6544 {
4d694b27 6545 nstores = lnunits;
04199738
RB
6546 lnel = group_size;
6547 ltype = build_nonstandard_integer_type (lsize, 1);
6548 lvectype = build_vector_type (ltype, nstores);
6549 }
6550 /* Else fall back to vector extraction anyway.
6551 Fewer stores are more important than avoiding spilling
6552 of the vector we extract from. Compared to the
6553 construction case in vectorizable_load no store-forwarding
6554 issue exists here for reasonable archs. */
6555 }
b17dc4d4 6556 }
4d694b27
RS
6557 else if (group_size >= const_nunits
6558 && group_size % const_nunits == 0)
b17dc4d4
RB
6559 {
6560 nstores = 1;
4d694b27 6561 lnel = const_nunits;
b17dc4d4 6562 ltype = vectype;
04199738 6563 lvectype = vectype;
b17dc4d4 6564 }
cee62fee
MM
6565 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6566 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6567 }
6568
f2e2a985
MM
6569 ivstep = stride_step;
6570 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 6571 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
6572
6573 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6574
b210f45f
RB
6575 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6576 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
f2e2a985
MM
6577 create_iv (stride_base, ivstep, NULL,
6578 loop, &incr_gsi, insert_after,
6579 &offvar, NULL);
6580 incr = gsi_stmt (incr_gsi);
310213d4 6581 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985 6582
b210f45f 6583 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
f2e2a985
MM
6584
6585 prev_stmt_info = NULL;
44fc7854 6586 alias_off = build_int_cst (ref_type, 0);
f502d50e
MM
6587 next_stmt = first_stmt;
6588 for (g = 0; g < group_size; g++)
f2e2a985 6589 {
f502d50e
MM
6590 running_off = offvar;
6591 if (g)
f2e2a985 6592 {
f502d50e
MM
6593 tree size = TYPE_SIZE_UNIT (ltype);
6594 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 6595 size);
f502d50e 6596 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 6597 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 6598 running_off, pos);
f2e2a985 6599 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 6600 running_off = newoff;
f502d50e 6601 }
b17dc4d4
RB
6602 unsigned int group_el = 0;
6603 unsigned HOST_WIDE_INT
6604 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
6605 for (j = 0; j < ncopies; j++)
6606 {
c3a8f964 6607 /* We've set op and dt above, from vect_get_store_rhs,
f502d50e
MM
6608 and first_stmt == stmt. */
6609 if (j == 0)
6610 {
6611 if (slp)
6612 {
6613 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
306b0c92 6614 slp_node);
f502d50e
MM
6615 vec_oprnd = vec_oprnds[0];
6616 }
6617 else
6618 {
c3a8f964 6619 op = vect_get_store_rhs (next_stmt);
81c40241 6620 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
6621 }
6622 }
f2e2a985 6623 else
f502d50e
MM
6624 {
6625 if (slp)
6626 vec_oprnd = vec_oprnds[j];
6627 else
c079cbac 6628 {
929b4411
RS
6629 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6630 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6631 vec_oprnd);
c079cbac 6632 }
f502d50e 6633 }
04199738
RB
6634 /* Pun the vector to extract from if necessary. */
6635 if (lvectype != vectype)
6636 {
6637 tree tem = make_ssa_name (lvectype);
6638 gimple *pun
6639 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6640 lvectype, vec_oprnd));
6641 vect_finish_stmt_generation (stmt, pun, gsi);
6642 vec_oprnd = tem;
6643 }
f502d50e
MM
6644 for (i = 0; i < nstores; i++)
6645 {
6646 tree newref, newoff;
355fe088 6647 gimple *incr, *assign;
f502d50e
MM
6648 tree size = TYPE_SIZE (ltype);
6649 /* Extract the i'th component. */
6650 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6651 bitsize_int (i), size);
6652 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6653 size, pos);
6654
6655 elem = force_gimple_operand_gsi (gsi, elem, true,
6656 NULL_TREE, true,
6657 GSI_SAME_STMT);
6658
b17dc4d4
RB
6659 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6660 group_el * elsz);
f502d50e 6661 newref = build2 (MEM_REF, ltype,
b17dc4d4 6662 running_off, this_off);
f502d50e
MM
6663
6664 /* And store it to *running_off. */
6665 assign = gimple_build_assign (newref, elem);
6666 vect_finish_stmt_generation (stmt, assign, gsi);
6667
b17dc4d4
RB
6668 group_el += lnel;
6669 if (! slp
6670 || group_el == group_size)
6671 {
6672 newoff = copy_ssa_name (running_off, NULL);
6673 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6674 running_off, stride_step);
6675 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 6676
b17dc4d4
RB
6677 running_off = newoff;
6678 group_el = 0;
6679 }
225ce44b
RB
6680 if (g == group_size - 1
6681 && !slp)
f502d50e
MM
6682 {
6683 if (j == 0 && i == 0)
225ce44b
RB
6684 STMT_VINFO_VEC_STMT (stmt_info)
6685 = *vec_stmt = assign;
f502d50e
MM
6686 else
6687 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6688 prev_stmt_info = vinfo_for_stmt (assign);
6689 }
6690 }
f2e2a985 6691 }
f502d50e 6692 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
6693 if (slp)
6694 break;
f2e2a985 6695 }
778dd3b6
RB
6696
6697 vec_oprnds.release ();
f2e2a985
MM
6698 return true;
6699 }
6700
8c681247 6701 auto_vec<tree> dr_chain (group_size);
9771b263 6702 oprnds.create (group_size);
ebfd146a 6703
720f5239 6704 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6705 gcc_assert (alignment_support_scheme);
70088b95
RS
6706 vec_loop_masks *loop_masks
6707 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6708 ? &LOOP_VINFO_MASKS (loop_vinfo)
6709 : NULL);
272c6793 6710 /* Targets with store-lane instructions must not require explicit
c3a8f964
RS
6711 realignment. vect_supportable_dr_alignment always returns either
6712 dr_aligned or dr_unaligned_supported for masked operations. */
7cfb4d93
RS
6713 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6714 && !mask
70088b95 6715 && !loop_masks)
272c6793
RS
6716 || alignment_support_scheme == dr_aligned
6717 || alignment_support_scheme == dr_unaligned_supported);
6718
62da9e14
RS
6719 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6720 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
6721 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6722
f307441a
RS
6723 tree bump;
6724 tree vec_offset = NULL_TREE;
6725 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6726 {
6727 aggr_type = NULL_TREE;
6728 bump = NULL_TREE;
6729 }
6730 else if (memory_access_type == VMAT_GATHER_SCATTER)
6731 {
6732 aggr_type = elem_type;
6733 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6734 &bump, &vec_offset);
6735 }
272c6793 6736 else
f307441a
RS
6737 {
6738 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6739 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6740 else
6741 aggr_type = vectype;
6742 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6743 }
ebfd146a 6744
c3a8f964
RS
6745 if (mask)
6746 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6747
ebfd146a
IR
6748 /* In case the vectorization factor (VF) is bigger than the number
6749 of elements that we can fit in a vectype (nunits), we have to generate
6750 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 6751 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
6752 vect_get_vec_def_for_copy_stmt. */
6753
0d0293ac 6754 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6755
6756 S1: &base + 2 = x2
6757 S2: &base = x0
6758 S3: &base + 1 = x1
6759 S4: &base + 3 = x3
6760
6761 We create vectorized stores starting from base address (the access of the
6762 first stmt in the chain (S2 in the above example), when the last store stmt
6763 of the chain (S4) is reached:
6764
6765 VS1: &base = vx2
6766 VS2: &base + vec_size*1 = vx0
6767 VS3: &base + vec_size*2 = vx1
6768 VS4: &base + vec_size*3 = vx3
6769
6770 Then permutation statements are generated:
6771
3fcc1b55
JJ
6772 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6773 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 6774 ...
b8698a0f 6775
ebfd146a
IR
6776 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6777 (the order of the data-refs in the output of vect_permute_store_chain
6778 corresponds to the order of scalar stmts in the interleaving chain - see
6779 the documentation of vect_permute_store_chain()).
6780
6781 In case of both multiple types and interleaving, above vector stores and
ff802fa1 6782 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 6783 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 6784 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
6785 */
6786
6787 prev_stmt_info = NULL;
c3a8f964 6788 tree vec_mask = NULL_TREE;
ebfd146a
IR
6789 for (j = 0; j < ncopies; j++)
6790 {
ebfd146a
IR
6791
6792 if (j == 0)
6793 {
6794 if (slp)
6795 {
6796 /* Get vectorized arguments for SLP_NODE. */
d092494c 6797 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
306b0c92 6798 NULL, slp_node);
ebfd146a 6799
9771b263 6800 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
6801 }
6802 else
6803 {
b8698a0f
L
6804 /* For interleaved stores we collect vectorized defs for all the
6805 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6806 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
6807 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6808
0d0293ac 6809 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 6810 OPRNDS are of size 1. */
b8698a0f 6811 next_stmt = first_stmt;
ebfd146a
IR
6812 for (i = 0; i < group_size; i++)
6813 {
b8698a0f
L
6814 /* Since gaps are not supported for interleaved stores,
6815 GROUP_SIZE is the exact number of stmts in the chain.
6816 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6817 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a 6818 iteration of the loop will be executed. */
c3a8f964 6819 op = vect_get_store_rhs (next_stmt);
81c40241 6820 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
6821 dr_chain.quick_push (vec_oprnd);
6822 oprnds.quick_push (vec_oprnd);
e14c1050 6823 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a 6824 }
c3a8f964
RS
6825 if (mask)
6826 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6827 mask_vectype);
ebfd146a
IR
6828 }
6829
6830 /* We should have catched mismatched types earlier. */
6831 gcc_assert (useless_type_conversion_p (vectype,
6832 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
6833 bool simd_lane_access_p
6834 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6835 if (simd_lane_access_p
6836 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6837 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6838 && integer_zerop (DR_OFFSET (first_dr))
6839 && integer_zerop (DR_INIT (first_dr))
6840 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 6841 get_alias_set (TREE_TYPE (ref_type))))
74bf76ed
JJ
6842 {
6843 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 6844 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 6845 inv_p = false;
74bf76ed 6846 }
f307441a
RS
6847 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6848 {
6849 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6850 &dataref_ptr, &vec_offset);
6851 inv_p = false;
6852 }
74bf76ed
JJ
6853 else
6854 dataref_ptr
6855 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6856 simd_lane_access_p ? loop : NULL,
09dfa495 6857 offset, &dummy, gsi, &ptr_incr,
f307441a
RS
6858 simd_lane_access_p, &inv_p,
6859 NULL_TREE, bump);
a70d6342 6860 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 6861 }
b8698a0f 6862 else
ebfd146a 6863 {
b8698a0f
L
6864 /* For interleaved stores we created vectorized defs for all the
6865 defs stored in OPRNDS in the previous iteration (previous copy).
6866 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
6867 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6868 next copy.
0d0293ac 6869 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
6870 OPRNDS are of size 1. */
6871 for (i = 0; i < group_size; i++)
6872 {
9771b263 6873 op = oprnds[i];
929b4411
RS
6874 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6875 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
9771b263
DN
6876 dr_chain[i] = vec_oprnd;
6877 oprnds[i] = vec_oprnd;
ebfd146a 6878 }
c3a8f964 6879 if (mask)
929b4411 6880 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
74bf76ed
JJ
6881 if (dataref_offset)
6882 dataref_offset
f307441a
RS
6883 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6884 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
6885 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6886 vec_offset);
74bf76ed
JJ
6887 else
6888 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
f307441a 6889 bump);
ebfd146a
IR
6890 }
6891
2de001ee 6892 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 6893 {
272c6793 6894 tree vec_array;
267d3070 6895
272c6793
RS
6896 /* Combine all the vectors into an array. */
6897 vec_array = create_vector_array (vectype, vec_num);
6898 for (i = 0; i < vec_num; i++)
c2d7ab2a 6899 {
9771b263 6900 vec_oprnd = dr_chain[i];
272c6793 6901 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 6902 }
b8698a0f 6903
7cfb4d93 6904 tree final_mask = NULL;
70088b95
RS
6905 if (loop_masks)
6906 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
6907 vectype, j);
7cfb4d93
RS
6908 if (vec_mask)
6909 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6910 vec_mask, gsi);
6911
7e11fc7f 6912 gcall *call;
7cfb4d93 6913 if (final_mask)
7e11fc7f
RS
6914 {
6915 /* Emit:
6916 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6917 VEC_ARRAY). */
6918 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6919 tree alias_ptr = build_int_cst (ref_type, align);
6920 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6921 dataref_ptr, alias_ptr,
7cfb4d93 6922 final_mask, vec_array);
7e11fc7f
RS
6923 }
6924 else
6925 {
6926 /* Emit:
6927 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6928 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6929 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6930 vec_array);
6931 gimple_call_set_lhs (call, data_ref);
6932 }
a844293d
RS
6933 gimple_call_set_nothrow (call, true);
6934 new_stmt = call;
267d3070 6935 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6936 }
6937 else
6938 {
6939 new_stmt = NULL;
0d0293ac 6940 if (grouped_store)
272c6793 6941 {
b6b9227d
JJ
6942 if (j == 0)
6943 result_chain.create (group_size);
272c6793
RS
6944 /* Permute. */
6945 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6946 &result_chain);
6947 }
c2d7ab2a 6948
272c6793
RS
6949 next_stmt = first_stmt;
6950 for (i = 0; i < vec_num; i++)
6951 {
644ffefd 6952 unsigned align, misalign;
272c6793 6953
7cfb4d93 6954 tree final_mask = NULL_TREE;
70088b95
RS
6955 if (loop_masks)
6956 final_mask = vect_get_loop_mask (gsi, loop_masks,
6957 vec_num * ncopies,
7cfb4d93
RS
6958 vectype, vec_num * j + i);
6959 if (vec_mask)
6960 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6961 vec_mask, gsi);
6962
f307441a
RS
6963 if (memory_access_type == VMAT_GATHER_SCATTER)
6964 {
6965 tree scale = size_int (gs_info.scale);
6966 gcall *call;
70088b95 6967 if (loop_masks)
f307441a
RS
6968 call = gimple_build_call_internal
6969 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
6970 scale, vec_oprnd, final_mask);
6971 else
6972 call = gimple_build_call_internal
6973 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
6974 scale, vec_oprnd);
6975 gimple_call_set_nothrow (call, true);
6976 new_stmt = call;
6977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6978 break;
6979 }
6980
272c6793
RS
6981 if (i > 0)
6982 /* Bump the vector pointer. */
6983 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
f307441a 6984 stmt, bump);
272c6793
RS
6985
6986 if (slp)
9771b263 6987 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
6988 else if (grouped_store)
6989 /* For grouped stores vectorized defs are interleaved in
272c6793 6990 vect_permute_store_chain(). */
9771b263 6991 vec_oprnd = result_chain[i];
272c6793 6992
f702e7d4 6993 align = DR_TARGET_ALIGNMENT (first_dr);
272c6793 6994 if (aligned_access_p (first_dr))
644ffefd 6995 misalign = 0;
272c6793
RS
6996 else if (DR_MISALIGNMENT (first_dr) == -1)
6997 {
25f68d90 6998 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 6999 misalign = 0;
272c6793
RS
7000 }
7001 else
c3a8f964 7002 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
7003 if (dataref_offset == NULL_TREE
7004 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7005 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7006 misalign);
c2d7ab2a 7007
62da9e14 7008 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
09dfa495
BM
7009 {
7010 tree perm_mask = perm_mask_for_reverse (vectype);
7011 tree perm_dest
c3a8f964 7012 = vect_create_destination_var (vect_get_store_rhs (stmt),
09dfa495 7013 vectype);
b731b390 7014 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
7015
7016 /* Generate the permute statement. */
355fe088 7017 gimple *perm_stmt
0d0e4a03
JJ
7018 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7019 vec_oprnd, perm_mask);
09dfa495
BM
7020 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7021
7022 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7023 vec_oprnd = new_temp;
7024 }
7025
272c6793 7026 /* Arguments are ready. Create the new vector stmt. */
7cfb4d93 7027 if (final_mask)
c3a8f964
RS
7028 {
7029 align = least_bit_hwi (misalign | align);
7030 tree ptr = build_int_cst (ref_type, align);
7031 gcall *call
7032 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7033 dataref_ptr, ptr,
7cfb4d93 7034 final_mask, vec_oprnd);
c3a8f964
RS
7035 gimple_call_set_nothrow (call, true);
7036 new_stmt = call;
7037 }
7038 else
7039 {
7040 data_ref = fold_build2 (MEM_REF, vectype,
7041 dataref_ptr,
7042 dataref_offset
7043 ? dataref_offset
7044 : build_int_cst (ref_type, 0));
7045 if (aligned_access_p (first_dr))
7046 ;
7047 else if (DR_MISALIGNMENT (first_dr) == -1)
7048 TREE_TYPE (data_ref)
7049 = build_aligned_type (TREE_TYPE (data_ref),
7050 align * BITS_PER_UNIT);
7051 else
7052 TREE_TYPE (data_ref)
7053 = build_aligned_type (TREE_TYPE (data_ref),
7054 TYPE_ALIGN (elem_type));
7055 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7056 }
272c6793 7057 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
7058
7059 if (slp)
7060 continue;
7061
e14c1050 7062 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
7063 if (!next_stmt)
7064 break;
7065 }
ebfd146a 7066 }
1da0876c
RS
7067 if (!slp)
7068 {
7069 if (j == 0)
7070 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7071 else
7072 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7073 prev_stmt_info = vinfo_for_stmt (new_stmt);
7074 }
ebfd146a
IR
7075 }
7076
9771b263
DN
7077 oprnds.release ();
7078 result_chain.release ();
7079 vec_oprnds.release ();
ebfd146a
IR
7080
7081 return true;
7082}
7083
557be5a8
AL
7084/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7085 VECTOR_CST mask. No checks are made that the target platform supports the
7ac7e286 7086 mask, so callers may wish to test can_vec_perm_const_p separately, or use
557be5a8 7087 vect_gen_perm_mask_checked. */
a1e53f3f 7088
3fcc1b55 7089tree
4aae3cb3 7090vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
a1e53f3f 7091{
b00cb3bf 7092 tree mask_type;
a1e53f3f 7093
0ecc2b7d
RS
7094 poly_uint64 nunits = sel.length ();
7095 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
b00cb3bf
RS
7096
7097 mask_type = build_vector_type (ssizetype, nunits);
736d0f28 7098 return vec_perm_indices_to_tree (mask_type, sel);
a1e53f3f
L
7099}
7100
7ac7e286 7101/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
cf7aa6a3 7102 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
7103
7104tree
4aae3cb3 7105vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
557be5a8 7106{
7ac7e286 7107 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
557be5a8
AL
7108 return vect_gen_perm_mask_any (vectype, sel);
7109}
7110
aec7ae7d
JJ
7111/* Given a vector variable X and Y, that was generated for the scalar
7112 STMT, generate instructions to permute the vector elements of X and Y
7113 using permutation mask MASK_VEC, insert them at *GSI and return the
7114 permuted vector variable. */
a1e53f3f
L
7115
7116static tree
355fe088 7117permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 7118 gimple_stmt_iterator *gsi)
a1e53f3f
L
7119{
7120 tree vectype = TREE_TYPE (x);
aec7ae7d 7121 tree perm_dest, data_ref;
355fe088 7122 gimple *perm_stmt;
a1e53f3f 7123
7ad429a4
RS
7124 tree scalar_dest = gimple_get_lhs (stmt);
7125 if (TREE_CODE (scalar_dest) == SSA_NAME)
7126 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7127 else
7128 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
b731b390 7129 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
7130
7131 /* Generate the permute statement. */
0d0e4a03 7132 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
7133 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7134
7135 return data_ref;
7136}
7137
6b916b36
RB
7138/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7139 inserting them on the loops preheader edge. Returns true if we
7140 were successful in doing so (and thus STMT can be moved then),
7141 otherwise returns false. */
7142
7143static bool
355fe088 7144hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
7145{
7146 ssa_op_iter i;
7147 tree op;
7148 bool any = false;
7149
7150 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7151 {
355fe088 7152 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7153 if (!gimple_nop_p (def_stmt)
7154 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7155 {
7156 /* Make sure we don't need to recurse. While we could do
7157 so in simple cases when there are more complex use webs
7158 we don't have an easy way to preserve stmt order to fulfil
7159 dependencies within them. */
7160 tree op2;
7161 ssa_op_iter i2;
d1417442
JJ
7162 if (gimple_code (def_stmt) == GIMPLE_PHI)
7163 return false;
6b916b36
RB
7164 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7165 {
355fe088 7166 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
7167 if (!gimple_nop_p (def_stmt2)
7168 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7169 return false;
7170 }
7171 any = true;
7172 }
7173 }
7174
7175 if (!any)
7176 return true;
7177
7178 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7179 {
355fe088 7180 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
7181 if (!gimple_nop_p (def_stmt)
7182 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7183 {
7184 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7185 gsi_remove (&gsi, false);
7186 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7187 }
7188 }
7189
7190 return true;
7191}
7192
ebfd146a
IR
7193/* vectorizable_load.
7194
b8698a0f
L
7195 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7196 can be vectorized.
7197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
7198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7200
7201static bool
355fe088 7202vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 7203 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
7204{
7205 tree scalar_dest;
7206 tree vec_dest = NULL;
7207 tree data_ref = NULL;
7208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 7209 stmt_vec_info prev_stmt_info;
ebfd146a 7210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 7211 struct loop *loop = NULL;
ebfd146a 7212 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 7213 bool nested_in_vect_loop = false;
c716e67f 7214 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 7215 tree elem_type;
ebfd146a 7216 tree new_temp;
ef4bddc2 7217 machine_mode mode;
355fe088 7218 gimple *new_stmt = NULL;
ebfd146a
IR
7219 tree dummy;
7220 enum dr_alignment_support alignment_support_scheme;
7221 tree dataref_ptr = NULL_TREE;
74bf76ed 7222 tree dataref_offset = NULL_TREE;
355fe088 7223 gimple *ptr_incr = NULL;
ebfd146a 7224 int ncopies;
4d694b27
RS
7225 int i, j;
7226 unsigned int group_size;
7227 poly_uint64 group_gap_adj;
ebfd146a
IR
7228 tree msq = NULL_TREE, lsq;
7229 tree offset = NULL_TREE;
356bbc4c 7230 tree byte_offset = NULL_TREE;
ebfd146a 7231 tree realignment_token = NULL_TREE;
538dd0b7 7232 gphi *phi = NULL;
6e1aa848 7233 vec<tree> dr_chain = vNULL;
0d0293ac 7234 bool grouped_load = false;
355fe088 7235 gimple *first_stmt;
4f0a0218 7236 gimple *first_stmt_for_drptr = NULL;
ebfd146a
IR
7237 bool inv_p;
7238 bool compute_in_loop = false;
7239 struct loop *at_loop;
7240 int vec_num;
7241 bool slp = (slp_node != NULL);
7242 bool slp_perm = false;
a70d6342 7243 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
d9f21f6a 7244 poly_uint64 vf;
272c6793 7245 tree aggr_type;
134c85ca 7246 gather_scatter_info gs_info;
310213d4 7247 vec_info *vinfo = stmt_info->vinfo;
44fc7854 7248 tree ref_type;
929b4411 7249 enum vect_def_type mask_dt = vect_unknown_def_type;
a70d6342 7250
465c8c19
JJ
7251 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7252 return false;
7253
66c16fd9
RB
7254 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7255 && ! vec_stmt)
465c8c19
JJ
7256 return false;
7257
c3a8f964
RS
7258 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7259 if (is_gimple_assign (stmt))
7260 {
7261 scalar_dest = gimple_assign_lhs (stmt);
7262 if (TREE_CODE (scalar_dest) != SSA_NAME)
7263 return false;
465c8c19 7264
c3a8f964
RS
7265 tree_code code = gimple_assign_rhs_code (stmt);
7266 if (code != ARRAY_REF
7267 && code != BIT_FIELD_REF
7268 && code != INDIRECT_REF
7269 && code != COMPONENT_REF
7270 && code != IMAGPART_EXPR
7271 && code != REALPART_EXPR
7272 && code != MEM_REF
7273 && TREE_CODE_CLASS (code) != tcc_declaration)
7274 return false;
7275 }
7276 else
7277 {
7278 gcall *call = dyn_cast <gcall *> (stmt);
bfaa08b7
RS
7279 if (!call || !gimple_call_internal_p (call))
7280 return false;
7281
7282 internal_fn ifn = gimple_call_internal_fn (call);
7283 if (!internal_load_fn_p (ifn))
c3a8f964 7284 return false;
465c8c19 7285
c3a8f964
RS
7286 scalar_dest = gimple_call_lhs (call);
7287 if (!scalar_dest)
7288 return false;
7289
7290 if (slp_node != NULL)
7291 {
7292 if (dump_enabled_p ())
7293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7294 "SLP of masked loads not supported.\n");
7295 return false;
7296 }
7297
bfaa08b7
RS
7298 int mask_index = internal_fn_mask_index (ifn);
7299 if (mask_index >= 0)
7300 {
7301 mask = gimple_call_arg (call, mask_index);
929b4411
RS
7302 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7303 &mask_vectype))
bfaa08b7
RS
7304 return false;
7305 }
c3a8f964 7306 }
465c8c19
JJ
7307
7308 if (!STMT_VINFO_DATA_REF (stmt_info))
7309 return false;
7310
7311 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4d694b27 7312 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
465c8c19 7313
a70d6342
IR
7314 if (loop_vinfo)
7315 {
7316 loop = LOOP_VINFO_LOOP (loop_vinfo);
7317 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7318 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7319 }
7320 else
3533e503 7321 vf = 1;
ebfd146a
IR
7322
7323 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 7324 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 7325 case of SLP. */
fce57248 7326 if (slp)
ebfd146a
IR
7327 ncopies = 1;
7328 else
e8f142e2 7329 ncopies = vect_get_num_copies (loop_vinfo, vectype);
ebfd146a
IR
7330
7331 gcc_assert (ncopies >= 1);
7332
7333 /* FORNOW. This restriction should be relaxed. */
7334 if (nested_in_vect_loop && ncopies > 1)
7335 {
73fbfcad 7336 if (dump_enabled_p ())
78c60e3d 7337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7338 "multiple types in nested loop.\n");
ebfd146a
IR
7339 return false;
7340 }
7341
f2556b68
RB
7342 /* Invalidate assumptions made by dependence analysis when vectorization
7343 on the unrolled body effectively re-orders stmts. */
7344 if (ncopies > 1
7345 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7346 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7347 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7348 {
7349 if (dump_enabled_p ())
7350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7351 "cannot perform implicit CSE when unrolling "
7352 "with negative dependence distance\n");
7353 return false;
7354 }
7355
7b7b1813 7356 elem_type = TREE_TYPE (vectype);
947131ba 7357 mode = TYPE_MODE (vectype);
ebfd146a
IR
7358
7359 /* FORNOW. In some cases can vectorize even if data-type not supported
7360 (e.g. - data copies). */
947131ba 7361 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 7362 {
73fbfcad 7363 if (dump_enabled_p ())
78c60e3d 7364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7365 "Aligned load, but unsupported type.\n");
ebfd146a
IR
7366 return false;
7367 }
7368
ebfd146a 7369 /* Check if the load is a part of an interleaving chain. */
0d0293ac 7370 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 7371 {
0d0293ac 7372 grouped_load = true;
ebfd146a 7373 /* FORNOW */
2de001ee
RS
7374 gcc_assert (!nested_in_vect_loop);
7375 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 7376
e14c1050 7377 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72 7378 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
d5f035ea 7379
b1af7da6
RB
7380 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7381 slp_perm = true;
7382
f2556b68
RB
7383 /* Invalidate assumptions made by dependence analysis when vectorization
7384 on the unrolled body effectively re-orders stmts. */
7385 if (!PURE_SLP_STMT (stmt_info)
7386 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
d9f21f6a
RS
7387 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7388 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
f2556b68
RB
7389 {
7390 if (dump_enabled_p ())
7391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7392 "cannot perform implicit CSE when performing "
7393 "group loads with negative dependence distance\n");
7394 return false;
7395 }
96bb56b2
RB
7396
7397 /* Similarly when the stmt is a load that is both part of a SLP
7398 instance and a loop vectorized stmt via the same-dr mechanism
7399 we have to give up. */
7400 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7401 && (STMT_SLP_TYPE (stmt_info)
7402 != STMT_SLP_TYPE (vinfo_for_stmt
7403 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7404 {
7405 if (dump_enabled_p ())
7406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7407 "conflicting SLP types for CSEd load\n");
7408 return false;
7409 }
ebfd146a 7410 }
7cfb4d93
RS
7411 else
7412 group_size = 1;
ebfd146a 7413
2de001ee 7414 vect_memory_access_type memory_access_type;
7e11fc7f 7415 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
2de001ee
RS
7416 &memory_access_type, &gs_info))
7417 return false;
a1e53f3f 7418
c3a8f964
RS
7419 if (mask)
7420 {
7421 if (memory_access_type == VMAT_CONTIGUOUS)
7422 {
7e11fc7f
RS
7423 machine_mode vec_mode = TYPE_MODE (vectype);
7424 if (!VECTOR_MODE_P (vec_mode)
7425 || !can_vec_mask_load_store_p (vec_mode,
c3a8f964
RS
7426 TYPE_MODE (mask_vectype), true))
7427 return false;
7428 }
bfaa08b7 7429 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
c3a8f964
RS
7430 {
7431 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7432 tree masktype
7433 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7434 if (TREE_CODE (masktype) == INTEGER_TYPE)
7435 {
7436 if (dump_enabled_p ())
7437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7438 "masked gather with integer mask not"
7439 " supported.");
7440 return false;
7441 }
7442 }
bfaa08b7
RS
7443 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7444 && memory_access_type != VMAT_GATHER_SCATTER)
c3a8f964
RS
7445 {
7446 if (dump_enabled_p ())
7447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7448 "unsupported access type for masked load.\n");
7449 return false;
7450 }
7451 }
7452
ebfd146a
IR
7453 if (!vec_stmt) /* transformation not required. */
7454 {
2de001ee
RS
7455 if (!slp)
7456 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7cfb4d93
RS
7457
7458 if (loop_vinfo
7459 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7460 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
bfaa08b7 7461 memory_access_type, &gs_info);
7cfb4d93 7462
ebfd146a 7463 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c 7464 /* The SLP costs are calculated during SLP analysis. */
78604de0 7465 if (! slp_node)
2de001ee 7466 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2e8ab70c 7467 NULL, NULL, NULL);
ebfd146a
IR
7468 return true;
7469 }
7470
2de001ee
RS
7471 if (!slp)
7472 gcc_assert (memory_access_type
7473 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7474
73fbfcad 7475 if (dump_enabled_p ())
78c60e3d 7476 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 7477 "transform load. ncopies = %d\n", ncopies);
ebfd146a 7478
67b8dbac 7479 /* Transform. */
ebfd146a 7480
f702e7d4 7481 ensure_base_align (dr);
c716e67f 7482
bfaa08b7 7483 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
aec7ae7d 7484 {
929b4411
RS
7485 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7486 mask_dt);
aec7ae7d
JJ
7487 return true;
7488 }
2de001ee
RS
7489
7490 if (memory_access_type == VMAT_ELEMENTWISE
7491 || memory_access_type == VMAT_STRIDED_SLP)
7d75abc8
MM
7492 {
7493 gimple_stmt_iterator incr_gsi;
7494 bool insert_after;
355fe088 7495 gimple *incr;
7d75abc8 7496 tree offvar;
7d75abc8
MM
7497 tree ivstep;
7498 tree running_off;
9771b263 7499 vec<constructor_elt, va_gc> *v = NULL;
14ac6aa2 7500 tree stride_base, stride_step, alias_off;
4d694b27
RS
7501 /* Checked by get_load_store_type. */
7502 unsigned int const_nunits = nunits.to_constant ();
b210f45f 7503 unsigned HOST_WIDE_INT cst_offset = 0;
14ac6aa2 7504
7cfb4d93 7505 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
14ac6aa2 7506 gcc_assert (!nested_in_vect_loop);
7d75abc8 7507
b210f45f 7508 if (grouped_load)
44fc7854
BE
7509 {
7510 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7511 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
44fc7854 7512 }
ab313a8c 7513 else
44fc7854
BE
7514 {
7515 first_stmt = stmt;
7516 first_dr = dr;
b210f45f
RB
7517 }
7518 if (slp && grouped_load)
7519 {
7520 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7521 ref_type = get_group_alias_ptr_type (first_stmt);
7522 }
7523 else
7524 {
7525 if (grouped_load)
7526 cst_offset
7527 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7528 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
44fc7854 7529 group_size = 1;
b210f45f 7530 ref_type = reference_alias_ptr_type (DR_REF (dr));
44fc7854 7531 }
ab313a8c 7532
14ac6aa2
RB
7533 stride_base
7534 = fold_build_pointer_plus
ab313a8c 7535 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 7536 size_binop (PLUS_EXPR,
ab313a8c
RB
7537 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7538 convert_to_ptrofftype (DR_INIT (first_dr))));
7539 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
7540
7541 /* For a load with loop-invariant (but other than power-of-2)
7542 stride (i.e. not a grouped access) like so:
7543
7544 for (i = 0; i < n; i += stride)
7545 ... = array[i];
7546
7547 we generate a new induction variable and new accesses to
7548 form a new vector (or vectors, depending on ncopies):
7549
7550 for (j = 0; ; j += VF*stride)
7551 tmp1 = array[j];
7552 tmp2 = array[j + stride];
7553 ...
7554 vectemp = {tmp1, tmp2, ...}
7555 */
7556
ab313a8c
RB
7557 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7558 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
7559
7560 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7561
b210f45f
RB
7562 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7563 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7564 create_iv (stride_base, ivstep, NULL,
7d75abc8
MM
7565 loop, &incr_gsi, insert_after,
7566 &offvar, NULL);
7567 incr = gsi_stmt (incr_gsi);
310213d4 7568 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 7569
b210f45f 7570 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7d75abc8
MM
7571
7572 prev_stmt_info = NULL;
7573 running_off = offvar;
44fc7854 7574 alias_off = build_int_cst (ref_type, 0);
4d694b27 7575 int nloads = const_nunits;
e09b4c37 7576 int lnel = 1;
7b5fc413 7577 tree ltype = TREE_TYPE (vectype);
ea60dd34 7578 tree lvectype = vectype;
b266b968 7579 auto_vec<tree> dr_chain;
2de001ee 7580 if (memory_access_type == VMAT_STRIDED_SLP)
7b5fc413 7581 {
4d694b27 7582 if (group_size < const_nunits)
e09b4c37 7583 {
ff03930a
JJ
7584 /* First check if vec_init optab supports construction from
7585 vector elts directly. */
b397965c 7586 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
9da15d40
RS
7587 machine_mode vmode;
7588 if (mode_for_vector (elmode, group_size).exists (&vmode)
7589 && VECTOR_MODE_P (vmode)
414fef4e 7590 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7591 && (convert_optab_handler (vec_init_optab,
7592 TYPE_MODE (vectype), vmode)
7593 != CODE_FOR_nothing))
ea60dd34 7594 {
4d694b27 7595 nloads = const_nunits / group_size;
ea60dd34 7596 lnel = group_size;
ff03930a
JJ
7597 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7598 }
7599 else
7600 {
7601 /* Otherwise avoid emitting a constructor of vector elements
7602 by performing the loads using an integer type of the same
7603 size, constructing a vector of those and then
7604 re-interpreting it as the original vector type.
7605 This avoids a huge runtime penalty due to the general
7606 inability to perform store forwarding from smaller stores
7607 to a larger load. */
7608 unsigned lsize
7609 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
fffbab82 7610 elmode = int_mode_for_size (lsize, 0).require ();
4d694b27 7611 unsigned int lnunits = const_nunits / group_size;
ff03930a
JJ
7612 /* If we can't construct such a vector fall back to
7613 element loads of the original vector type. */
4d694b27 7614 if (mode_for_vector (elmode, lnunits).exists (&vmode)
9da15d40 7615 && VECTOR_MODE_P (vmode)
414fef4e 7616 && targetm.vector_mode_supported_p (vmode)
ff03930a
JJ
7617 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7618 != CODE_FOR_nothing))
7619 {
4d694b27 7620 nloads = lnunits;
ff03930a
JJ
7621 lnel = group_size;
7622 ltype = build_nonstandard_integer_type (lsize, 1);
7623 lvectype = build_vector_type (ltype, nloads);
7624 }
ea60dd34 7625 }
e09b4c37 7626 }
2de001ee 7627 else
e09b4c37 7628 {
ea60dd34 7629 nloads = 1;
4d694b27 7630 lnel = const_nunits;
e09b4c37 7631 ltype = vectype;
e09b4c37 7632 }
2de001ee
RS
7633 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7634 }
7635 if (slp)
7636 {
66c16fd9
RB
7637 /* For SLP permutation support we need to load the whole group,
7638 not only the number of vector stmts the permutation result
7639 fits in. */
b266b968 7640 if (slp_perm)
66c16fd9 7641 {
d9f21f6a
RS
7642 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7643 variable VF. */
7644 unsigned int const_vf = vf.to_constant ();
4d694b27 7645 ncopies = CEIL (group_size * const_vf, const_nunits);
66c16fd9
RB
7646 dr_chain.create (ncopies);
7647 }
7648 else
7649 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 7650 }
4d694b27 7651 unsigned int group_el = 0;
e09b4c37
RB
7652 unsigned HOST_WIDE_INT
7653 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
7654 for (j = 0; j < ncopies; j++)
7655 {
7b5fc413 7656 if (nloads > 1)
e09b4c37
RB
7657 vec_alloc (v, nloads);
7658 for (i = 0; i < nloads; i++)
7b5fc413 7659 {
e09b4c37 7660 tree this_off = build_int_cst (TREE_TYPE (alias_off),
b210f45f 7661 group_el * elsz + cst_offset);
e09b4c37
RB
7662 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7663 build2 (MEM_REF, ltype,
7664 running_off, this_off));
7665 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7666 if (nloads > 1)
7667 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7668 gimple_assign_lhs (new_stmt));
7669
7670 group_el += lnel;
7671 if (! slp
7672 || group_el == group_size)
7b5fc413 7673 {
e09b4c37
RB
7674 tree newoff = copy_ssa_name (running_off);
7675 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7676 running_off, stride_step);
7b5fc413
RB
7677 vect_finish_stmt_generation (stmt, incr, gsi);
7678
7679 running_off = newoff;
e09b4c37 7680 group_el = 0;
7b5fc413 7681 }
7b5fc413 7682 }
e09b4c37 7683 if (nloads > 1)
7d75abc8 7684 {
ea60dd34
RB
7685 tree vec_inv = build_constructor (lvectype, v);
7686 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
e09b4c37 7687 new_stmt = SSA_NAME_DEF_STMT (new_temp);
ea60dd34
RB
7688 if (lvectype != vectype)
7689 {
7690 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7691 VIEW_CONVERT_EXPR,
7692 build1 (VIEW_CONVERT_EXPR,
7693 vectype, new_temp));
7694 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7695 }
7d75abc8
MM
7696 }
7697
7b5fc413 7698 if (slp)
b266b968 7699 {
b266b968
RB
7700 if (slp_perm)
7701 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
7702 else
7703 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 7704 }
7d75abc8 7705 else
225ce44b
RB
7706 {
7707 if (j == 0)
7708 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7709 else
7710 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7711 prev_stmt_info = vinfo_for_stmt (new_stmt);
7712 }
7d75abc8 7713 }
b266b968 7714 if (slp_perm)
29afecdf
RB
7715 {
7716 unsigned n_perms;
7717 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7718 slp_node_instance, false, &n_perms);
7719 }
7d75abc8
MM
7720 return true;
7721 }
aec7ae7d 7722
b5ec4de7
RS
7723 if (memory_access_type == VMAT_GATHER_SCATTER
7724 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
ab2fc782
RS
7725 grouped_load = false;
7726
0d0293ac 7727 if (grouped_load)
ebfd146a 7728 {
e14c1050 7729 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
44fc7854 7730 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4f0a0218 7731 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
7732 without permutation. */
7733 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
7734 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7735 /* For BB vectorization always use the first stmt to base
7736 the data ref pointer on. */
7737 if (bb_vinfo)
7738 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 7739
ebfd146a 7740 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
7741 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7742 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7743 ??? But we can only do so if there is exactly one
7744 as we have no way to get at the rest. Leave the CSE
7745 opportunity alone.
7746 ??? With the group load eventually participating
7747 in multiple different permutations (having multiple
7748 slp nodes which refer to the same group) the CSE
7749 is even wrong code. See PR56270. */
7750 && !slp)
ebfd146a
IR
7751 {
7752 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7753 return true;
7754 }
7755 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
9b999e8c 7756 group_gap_adj = 0;
ebfd146a
IR
7757
7758 /* VEC_NUM is the number of vect stmts to be created for this group. */
7759 if (slp)
7760 {
0d0293ac 7761 grouped_load = false;
91ff1504
RB
7762 /* For SLP permutation support we need to load the whole group,
7763 not only the number of vector stmts the permutation result
7764 fits in. */
7765 if (slp_perm)
b267968e 7766 {
d9f21f6a
RS
7767 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7768 variable VF. */
7769 unsigned int const_vf = vf.to_constant ();
4d694b27
RS
7770 unsigned int const_nunits = nunits.to_constant ();
7771 vec_num = CEIL (group_size * const_vf, const_nunits);
b267968e
RB
7772 group_gap_adj = vf * group_size - nunits * vec_num;
7773 }
91ff1504 7774 else
b267968e
RB
7775 {
7776 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
796bd467
RB
7777 group_gap_adj
7778 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
b267968e 7779 }
a70d6342 7780 }
ebfd146a 7781 else
9b999e8c 7782 vec_num = group_size;
44fc7854
BE
7783
7784 ref_type = get_group_alias_ptr_type (first_stmt);
ebfd146a
IR
7785 }
7786 else
7787 {
7788 first_stmt = stmt;
7789 first_dr = dr;
7790 group_size = vec_num = 1;
9b999e8c 7791 group_gap_adj = 0;
44fc7854 7792 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
ebfd146a
IR
7793 }
7794
720f5239 7795 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 7796 gcc_assert (alignment_support_scheme);
70088b95
RS
7797 vec_loop_masks *loop_masks
7798 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7799 ? &LOOP_VINFO_MASKS (loop_vinfo)
7800 : NULL);
7cfb4d93
RS
7801 /* Targets with store-lane instructions must not require explicit
7802 realignment. vect_supportable_dr_alignment always returns either
7803 dr_aligned or dr_unaligned_supported for masked operations. */
7804 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7805 && !mask
70088b95 7806 && !loop_masks)
272c6793
RS
7807 || alignment_support_scheme == dr_aligned
7808 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
7809
7810 /* In case the vectorization factor (VF) is bigger than the number
7811 of elements that we can fit in a vectype (nunits), we have to generate
7812 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 7813 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 7814 from one copy of the vector stmt to the next, in the field
ff802fa1 7815 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 7816 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
7817 stmts that use the defs of the current stmt. The example below
7818 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7819 need to create 4 vectorized stmts):
ebfd146a
IR
7820
7821 before vectorization:
7822 RELATED_STMT VEC_STMT
7823 S1: x = memref - -
7824 S2: z = x + 1 - -
7825
7826 step 1: vectorize stmt S1:
7827 We first create the vector stmt VS1_0, and, as usual, record a
7828 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7829 Next, we create the vector stmt VS1_1, and record a pointer to
7830 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 7831 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
7832 stmts and pointers:
7833 RELATED_STMT VEC_STMT
7834 VS1_0: vx0 = memref0 VS1_1 -
7835 VS1_1: vx1 = memref1 VS1_2 -
7836 VS1_2: vx2 = memref2 VS1_3 -
7837 VS1_3: vx3 = memref3 - -
7838 S1: x = load - VS1_0
7839 S2: z = x + 1 - -
7840
b8698a0f
L
7841 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7842 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
7843 stmt S2. */
7844
0d0293ac 7845 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
7846
7847 S1: x2 = &base + 2
7848 S2: x0 = &base
7849 S3: x1 = &base + 1
7850 S4: x3 = &base + 3
7851
b8698a0f 7852 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
7853 starting from the access of the first stmt of the chain:
7854
7855 VS1: vx0 = &base
7856 VS2: vx1 = &base + vec_size*1
7857 VS3: vx3 = &base + vec_size*2
7858 VS4: vx4 = &base + vec_size*3
7859
7860 Then permutation statements are generated:
7861
e2c83630
RH
7862 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7863 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
7864 ...
7865
7866 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7867 (the order of the data-refs in the output of vect_permute_load_chain
7868 corresponds to the order of scalar stmts in the interleaving chain - see
7869 the documentation of vect_permute_load_chain()).
7870 The generation of permutation stmts and recording them in
0d0293ac 7871 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 7872
b8698a0f 7873 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
7874 permutation stmts above are created for every copy. The result vector
7875 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7876 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
7877
7878 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7879 on a target that supports unaligned accesses (dr_unaligned_supported)
7880 we generate the following code:
7881 p = initial_addr;
7882 indx = 0;
7883 loop {
7884 p = p + indx * vectype_size;
7885 vec_dest = *(p);
7886 indx = indx + 1;
7887 }
7888
7889 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 7890 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
7891 then generate the following code, in which the data in each iteration is
7892 obtained by two vector loads, one from the previous iteration, and one
7893 from the current iteration:
7894 p1 = initial_addr;
7895 msq_init = *(floor(p1))
7896 p2 = initial_addr + VS - 1;
7897 realignment_token = call target_builtin;
7898 indx = 0;
7899 loop {
7900 p2 = p2 + indx * vectype_size
7901 lsq = *(floor(p2))
7902 vec_dest = realign_load (msq, lsq, realignment_token)
7903 indx = indx + 1;
7904 msq = lsq;
7905 } */
7906
7907 /* If the misalignment remains the same throughout the execution of the
7908 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7909 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7910 This can only occur when vectorizing memory accesses in the inner-loop
7911 nested within an outer-loop that is being vectorized. */
7912
d1e4b493 7913 if (nested_in_vect_loop
cf098191
RS
7914 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7915 GET_MODE_SIZE (TYPE_MODE (vectype))))
ebfd146a
IR
7916 {
7917 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7918 compute_in_loop = true;
7919 }
7920
7921 if ((alignment_support_scheme == dr_explicit_realign_optimized
7922 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7923 && !compute_in_loop)
ebfd146a
IR
7924 {
7925 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7926 alignment_support_scheme, NULL_TREE,
7927 &at_loop);
7928 if (alignment_support_scheme == dr_explicit_realign_optimized)
7929 {
538dd0b7 7930 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7931 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7932 size_one_node);
ebfd146a
IR
7933 }
7934 }
7935 else
7936 at_loop = loop;
7937
62da9e14 7938 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
a1e53f3f
L
7939 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7940
ab2fc782
RS
7941 tree bump;
7942 tree vec_offset = NULL_TREE;
7943 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7944 {
7945 aggr_type = NULL_TREE;
7946 bump = NULL_TREE;
7947 }
7948 else if (memory_access_type == VMAT_GATHER_SCATTER)
7949 {
7950 aggr_type = elem_type;
7951 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
7952 &bump, &vec_offset);
7953 }
272c6793 7954 else
ab2fc782
RS
7955 {
7956 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7957 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7958 else
7959 aggr_type = vectype;
7960 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
7961 }
272c6793 7962
c3a8f964 7963 tree vec_mask = NULL_TREE;
ebfd146a 7964 prev_stmt_info = NULL;
4d694b27 7965 poly_uint64 group_elt = 0;
ebfd146a 7966 for (j = 0; j < ncopies; j++)
b8698a0f 7967 {
272c6793 7968 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7969 if (j == 0)
74bf76ed
JJ
7970 {
7971 bool simd_lane_access_p
7972 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7973 if (simd_lane_access_p
7974 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7975 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7976 && integer_zerop (DR_OFFSET (first_dr))
7977 && integer_zerop (DR_INIT (first_dr))
7978 && alias_sets_conflict_p (get_alias_set (aggr_type),
44fc7854 7979 get_alias_set (TREE_TYPE (ref_type)))
74bf76ed
JJ
7980 && (alignment_support_scheme == dr_aligned
7981 || alignment_support_scheme == dr_unaligned_supported))
7982 {
7983 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
44fc7854 7984 dataref_offset = build_int_cst (ref_type, 0);
8928eff3 7985 inv_p = false;
74bf76ed 7986 }
4f0a0218
RB
7987 else if (first_stmt_for_drptr
7988 && first_stmt != first_stmt_for_drptr)
7989 {
7990 dataref_ptr
7991 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7992 at_loop, offset, &dummy, gsi,
7993 &ptr_incr, simd_lane_access_p,
ab2fc782 7994 &inv_p, byte_offset, bump);
4f0a0218
RB
7995 /* Adjust the pointer by the difference to first_stmt. */
7996 data_reference_p ptrdr
7997 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7998 tree diff = fold_convert (sizetype,
7999 size_binop (MINUS_EXPR,
8000 DR_INIT (first_dr),
8001 DR_INIT (ptrdr)));
8002 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8003 stmt, diff);
8004 }
bfaa08b7
RS
8005 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8006 {
8007 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8008 &dataref_ptr, &vec_offset);
8009 inv_p = false;
8010 }
74bf76ed
JJ
8011 else
8012 dataref_ptr
8013 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8014 offset, &dummy, gsi, &ptr_incr,
356bbc4c 8015 simd_lane_access_p, &inv_p,
ab2fc782 8016 byte_offset, bump);
c3a8f964
RS
8017 if (mask)
8018 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8019 mask_vectype);
74bf76ed 8020 }
ebfd146a 8021 else
c3a8f964
RS
8022 {
8023 if (dataref_offset)
8024 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
ab2fc782 8025 bump);
bfaa08b7 8026 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
929b4411
RS
8027 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8028 vec_offset);
c3a8f964 8029 else
ab2fc782
RS
8030 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8031 stmt, bump);
c3a8f964 8032 if (mask)
929b4411 8033 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
c3a8f964 8034 }
ebfd146a 8035
0d0293ac 8036 if (grouped_load || slp_perm)
9771b263 8037 dr_chain.create (vec_num);
5ce1ee7f 8038
2de001ee 8039 if (memory_access_type == VMAT_LOAD_STORE_LANES)
ebfd146a 8040 {
272c6793
RS
8041 tree vec_array;
8042
8043 vec_array = create_vector_array (vectype, vec_num);
8044
7cfb4d93 8045 tree final_mask = NULL_TREE;
70088b95
RS
8046 if (loop_masks)
8047 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8048 vectype, j);
7cfb4d93
RS
8049 if (vec_mask)
8050 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8051 vec_mask, gsi);
8052
7e11fc7f 8053 gcall *call;
7cfb4d93 8054 if (final_mask)
7e11fc7f
RS
8055 {
8056 /* Emit:
8057 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8058 VEC_MASK). */
8059 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8060 tree alias_ptr = build_int_cst (ref_type, align);
8061 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8062 dataref_ptr, alias_ptr,
7cfb4d93 8063 final_mask);
7e11fc7f
RS
8064 }
8065 else
8066 {
8067 /* Emit:
8068 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8069 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8070 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8071 }
a844293d
RS
8072 gimple_call_set_lhs (call, vec_array);
8073 gimple_call_set_nothrow (call, true);
8074 new_stmt = call;
272c6793 8075 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 8076
272c6793
RS
8077 /* Extract each vector into an SSA_NAME. */
8078 for (i = 0; i < vec_num; i++)
ebfd146a 8079 {
272c6793
RS
8080 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8081 vec_array, i);
9771b263 8082 dr_chain.quick_push (new_temp);
272c6793
RS
8083 }
8084
8085 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 8086 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
8087 }
8088 else
8089 {
8090 for (i = 0; i < vec_num; i++)
8091 {
7cfb4d93 8092 tree final_mask = NULL_TREE;
70088b95 8093 if (loop_masks
7cfb4d93 8094 && memory_access_type != VMAT_INVARIANT)
70088b95
RS
8095 final_mask = vect_get_loop_mask (gsi, loop_masks,
8096 vec_num * ncopies,
7cfb4d93
RS
8097 vectype, vec_num * j + i);
8098 if (vec_mask)
8099 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8100 vec_mask, gsi);
8101
272c6793
RS
8102 if (i > 0)
8103 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
ab2fc782 8104 stmt, bump);
272c6793
RS
8105
8106 /* 2. Create the vector-load in the loop. */
8107 switch (alignment_support_scheme)
8108 {
8109 case dr_aligned:
8110 case dr_unaligned_supported:
be1ac4ec 8111 {
644ffefd
MJ
8112 unsigned int align, misalign;
8113
bfaa08b7
RS
8114 if (memory_access_type == VMAT_GATHER_SCATTER)
8115 {
8116 tree scale = size_int (gs_info.scale);
8117 gcall *call;
70088b95 8118 if (loop_masks)
bfaa08b7
RS
8119 call = gimple_build_call_internal
8120 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8121 vec_offset, scale, final_mask);
8122 else
8123 call = gimple_build_call_internal
8124 (IFN_GATHER_LOAD, 3, dataref_ptr,
8125 vec_offset, scale);
8126 gimple_call_set_nothrow (call, true);
8127 new_stmt = call;
8128 data_ref = NULL_TREE;
8129 break;
8130 }
8131
f702e7d4 8132 align = DR_TARGET_ALIGNMENT (dr);
272c6793
RS
8133 if (alignment_support_scheme == dr_aligned)
8134 {
8135 gcc_assert (aligned_access_p (first_dr));
644ffefd 8136 misalign = 0;
272c6793
RS
8137 }
8138 else if (DR_MISALIGNMENT (first_dr) == -1)
8139 {
25f68d90 8140 align = dr_alignment (vect_dr_behavior (first_dr));
52639a61 8141 misalign = 0;
272c6793
RS
8142 }
8143 else
c3a8f964 8144 misalign = DR_MISALIGNMENT (first_dr);
aed93b23
RB
8145 if (dataref_offset == NULL_TREE
8146 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
8147 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8148 align, misalign);
c3a8f964 8149
7cfb4d93 8150 if (final_mask)
c3a8f964
RS
8151 {
8152 align = least_bit_hwi (misalign | align);
8153 tree ptr = build_int_cst (ref_type, align);
8154 gcall *call
8155 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8156 dataref_ptr, ptr,
7cfb4d93 8157 final_mask);
c3a8f964
RS
8158 gimple_call_set_nothrow (call, true);
8159 new_stmt = call;
8160 data_ref = NULL_TREE;
8161 }
8162 else
8163 {
8164 data_ref
8165 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8166 dataref_offset
8167 ? dataref_offset
8168 : build_int_cst (ref_type, 0));
8169 if (alignment_support_scheme == dr_aligned)
8170 ;
8171 else if (DR_MISALIGNMENT (first_dr) == -1)
8172 TREE_TYPE (data_ref)
8173 = build_aligned_type (TREE_TYPE (data_ref),
8174 align * BITS_PER_UNIT);
8175 else
8176 TREE_TYPE (data_ref)
8177 = build_aligned_type (TREE_TYPE (data_ref),
8178 TYPE_ALIGN (elem_type));
8179 }
272c6793 8180 break;
be1ac4ec 8181 }
272c6793 8182 case dr_explicit_realign:
267d3070 8183 {
272c6793 8184 tree ptr, bump;
272c6793 8185
d88981fc 8186 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
8187
8188 if (compute_in_loop)
8189 msq = vect_setup_realignment (first_stmt, gsi,
8190 &realignment_token,
8191 dr_explicit_realign,
8192 dataref_ptr, NULL);
8193
aed93b23
RB
8194 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8195 ptr = copy_ssa_name (dataref_ptr);
8196 else
8197 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
f702e7d4 8198 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
0d0e4a03
JJ
8199 new_stmt = gimple_build_assign
8200 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
8201 build_int_cst
8202 (TREE_TYPE (dataref_ptr),
f702e7d4 8203 -(HOST_WIDE_INT) align));
272c6793
RS
8204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8205 data_ref
8206 = build2 (MEM_REF, vectype, ptr,
44fc7854 8207 build_int_cst (ref_type, 0));
272c6793
RS
8208 vec_dest = vect_create_destination_var (scalar_dest,
8209 vectype);
8210 new_stmt = gimple_build_assign (vec_dest, data_ref);
8211 new_temp = make_ssa_name (vec_dest, new_stmt);
8212 gimple_assign_set_lhs (new_stmt, new_temp);
8213 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8214 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8216 msq = new_temp;
8217
d88981fc 8218 bump = size_binop (MULT_EXPR, vs,
7b7b1813 8219 TYPE_SIZE_UNIT (elem_type));
d88981fc 8220 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 8221 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
8222 new_stmt = gimple_build_assign
8223 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793 8224 build_int_cst
f702e7d4 8225 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
aed93b23 8226 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
8227 gimple_assign_set_lhs (new_stmt, ptr);
8228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8229 data_ref
8230 = build2 (MEM_REF, vectype, ptr,
44fc7854 8231 build_int_cst (ref_type, 0));
272c6793 8232 break;
267d3070 8233 }
272c6793 8234 case dr_explicit_realign_optimized:
f702e7d4
RS
8235 {
8236 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8237 new_temp = copy_ssa_name (dataref_ptr);
8238 else
8239 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8240 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8241 new_stmt = gimple_build_assign
8242 (new_temp, BIT_AND_EXPR, dataref_ptr,
8243 build_int_cst (TREE_TYPE (dataref_ptr),
8244 -(HOST_WIDE_INT) align));
8245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8246 data_ref
8247 = build2 (MEM_REF, vectype, new_temp,
8248 build_int_cst (ref_type, 0));
8249 break;
8250 }
272c6793
RS
8251 default:
8252 gcc_unreachable ();
8253 }
ebfd146a 8254 vec_dest = vect_create_destination_var (scalar_dest, vectype);
c3a8f964
RS
8255 /* DATA_REF is null if we've already built the statement. */
8256 if (data_ref)
8257 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a 8258 new_temp = make_ssa_name (vec_dest, new_stmt);
c3a8f964 8259 gimple_set_lhs (new_stmt, new_temp);
ebfd146a
IR
8260 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8261
272c6793
RS
8262 /* 3. Handle explicit realignment if necessary/supported.
8263 Create in loop:
8264 vec_dest = realign_load (msq, lsq, realignment_token) */
8265 if (alignment_support_scheme == dr_explicit_realign_optimized
8266 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 8267 {
272c6793
RS
8268 lsq = gimple_assign_lhs (new_stmt);
8269 if (!realignment_token)
8270 realignment_token = dataref_ptr;
8271 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
8272 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8273 msq, lsq, realignment_token);
272c6793
RS
8274 new_temp = make_ssa_name (vec_dest, new_stmt);
8275 gimple_assign_set_lhs (new_stmt, new_temp);
8276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8277
8278 if (alignment_support_scheme == dr_explicit_realign_optimized)
8279 {
8280 gcc_assert (phi);
8281 if (i == vec_num - 1 && j == ncopies - 1)
8282 add_phi_arg (phi, lsq,
8283 loop_latch_edge (containing_loop),
9e227d60 8284 UNKNOWN_LOCATION);
272c6793
RS
8285 msq = lsq;
8286 }
ebfd146a 8287 }
ebfd146a 8288
59fd17e3
RB
8289 /* 4. Handle invariant-load. */
8290 if (inv_p && !bb_vinfo)
8291 {
59fd17e3 8292 gcc_assert (!grouped_load);
d1417442
JJ
8293 /* If we have versioned for aliasing or the loop doesn't
8294 have any data dependencies that would preclude this,
8295 then we are sure this is a loop invariant load and
8296 thus we can insert it on the preheader edge. */
8297 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8298 && !nested_in_vect_loop
6b916b36 8299 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
8300 {
8301 if (dump_enabled_p ())
8302 {
8303 dump_printf_loc (MSG_NOTE, vect_location,
8304 "hoisting out of the vectorized "
8305 "loop: ");
8306 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 8307 }
b731b390 8308 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
8309 gsi_insert_on_edge_immediate
8310 (loop_preheader_edge (loop),
8311 gimple_build_assign (tem,
8312 unshare_expr
8313 (gimple_assign_rhs1 (stmt))));
8314 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
8315 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8316 set_vinfo_for_stmt (new_stmt,
8317 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
8318 }
8319 else
8320 {
8321 gimple_stmt_iterator gsi2 = *gsi;
8322 gsi_next (&gsi2);
8323 new_temp = vect_init_vector (stmt, scalar_dest,
8324 vectype, &gsi2);
34cd48e5 8325 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 8326 }
59fd17e3
RB
8327 }
8328
62da9e14 8329 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
272c6793 8330 {
aec7ae7d
JJ
8331 tree perm_mask = perm_mask_for_reverse (vectype);
8332 new_temp = permute_vec_elements (new_temp, new_temp,
8333 perm_mask, stmt, gsi);
ebfd146a
IR
8334 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8335 }
267d3070 8336
272c6793 8337 /* Collect vector loads and later create their permutation in
0d0293ac
MM
8338 vect_transform_grouped_load (). */
8339 if (grouped_load || slp_perm)
9771b263 8340 dr_chain.quick_push (new_temp);
267d3070 8341
272c6793
RS
8342 /* Store vector loads in the corresponding SLP_NODE. */
8343 if (slp && !slp_perm)
9771b263 8344 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b267968e
RB
8345
8346 /* With SLP permutation we load the gaps as well, without
8347 we need to skip the gaps after we manage to fully load
8348 all elements. group_gap_adj is GROUP_SIZE here. */
8349 group_elt += nunits;
d9f21f6a
RS
8350 if (maybe_ne (group_gap_adj, 0U)
8351 && !slp_perm
8352 && known_eq (group_elt, group_size - group_gap_adj))
b267968e 8353 {
d9f21f6a
RS
8354 poly_wide_int bump_val
8355 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8356 * group_gap_adj);
8e6cdc90 8357 tree bump = wide_int_to_tree (sizetype, bump_val);
b267968e
RB
8358 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8359 stmt, bump);
8360 group_elt = 0;
8361 }
272c6793 8362 }
9b999e8c
RB
8363 /* Bump the vector pointer to account for a gap or for excess
8364 elements loaded for a permuted SLP load. */
d9f21f6a 8365 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
a64b9c26 8366 {
d9f21f6a
RS
8367 poly_wide_int bump_val
8368 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8369 * group_gap_adj);
8e6cdc90 8370 tree bump = wide_int_to_tree (sizetype, bump_val);
a64b9c26
RB
8371 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8372 stmt, bump);
8373 }
ebfd146a
IR
8374 }
8375
8376 if (slp && !slp_perm)
8377 continue;
8378
8379 if (slp_perm)
8380 {
29afecdf 8381 unsigned n_perms;
01d8bf07 8382 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
29afecdf
RB
8383 slp_node_instance, false,
8384 &n_perms))
ebfd146a 8385 {
9771b263 8386 dr_chain.release ();
ebfd146a
IR
8387 return false;
8388 }
8389 }
8390 else
8391 {
0d0293ac 8392 if (grouped_load)
ebfd146a 8393 {
2de001ee 8394 if (memory_access_type != VMAT_LOAD_STORE_LANES)
0d0293ac 8395 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 8396 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
8397 }
8398 else
8399 {
8400 if (j == 0)
8401 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8402 else
8403 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8404 prev_stmt_info = vinfo_for_stmt (new_stmt);
8405 }
8406 }
9771b263 8407 dr_chain.release ();
ebfd146a
IR
8408 }
8409
ebfd146a
IR
8410 return true;
8411}
8412
8413/* Function vect_is_simple_cond.
b8698a0f 8414
ebfd146a
IR
8415 Input:
8416 LOOP - the loop that is being vectorized.
8417 COND - Condition that is checked for simple use.
8418
e9e1d143
RG
8419 Output:
8420 *COMP_VECTYPE - the vector type for the comparison.
4fc5ebf1 8421 *DTS - The def types for the arguments of the comparison
e9e1d143 8422
ebfd146a
IR
8423 Returns whether a COND can be vectorized. Checks whether
8424 condition operands are supportable using vec_is_simple_use. */
8425
87aab9b2 8426static bool
4fc5ebf1 8427vect_is_simple_cond (tree cond, vec_info *vinfo,
8da4c8d8
RB
8428 tree *comp_vectype, enum vect_def_type *dts,
8429 tree vectype)
ebfd146a
IR
8430{
8431 tree lhs, rhs;
e9e1d143 8432 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 8433
a414c77f
IE
8434 /* Mask case. */
8435 if (TREE_CODE (cond) == SSA_NAME
2568d8a1 8436 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
a414c77f
IE
8437 {
8438 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8439 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
4fc5ebf1 8440 &dts[0], comp_vectype)
a414c77f
IE
8441 || !*comp_vectype
8442 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8443 return false;
8444 return true;
8445 }
8446
ebfd146a
IR
8447 if (!COMPARISON_CLASS_P (cond))
8448 return false;
8449
8450 lhs = TREE_OPERAND (cond, 0);
8451 rhs = TREE_OPERAND (cond, 1);
8452
8453 if (TREE_CODE (lhs) == SSA_NAME)
8454 {
355fe088 8455 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4fc5ebf1 8456 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
ebfd146a
IR
8457 return false;
8458 }
4fc5ebf1
JG
8459 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8460 || TREE_CODE (lhs) == FIXED_CST)
8461 dts[0] = vect_constant_def;
8462 else
ebfd146a
IR
8463 return false;
8464
8465 if (TREE_CODE (rhs) == SSA_NAME)
8466 {
355fe088 8467 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4fc5ebf1 8468 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
ebfd146a
IR
8469 return false;
8470 }
4fc5ebf1
JG
8471 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8472 || TREE_CODE (rhs) == FIXED_CST)
8473 dts[1] = vect_constant_def;
8474 else
ebfd146a
IR
8475 return false;
8476
28b33016 8477 if (vectype1 && vectype2
928686b1
RS
8478 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8479 TYPE_VECTOR_SUBPARTS (vectype2)))
28b33016
IE
8480 return false;
8481
e9e1d143 8482 *comp_vectype = vectype1 ? vectype1 : vectype2;
8da4c8d8
RB
8483 /* Invariant comparison. */
8484 if (! *comp_vectype)
8485 {
8486 tree scalar_type = TREE_TYPE (lhs);
8487 /* If we can widen the comparison to match vectype do so. */
8488 if (INTEGRAL_TYPE_P (scalar_type)
8489 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8490 TYPE_SIZE (TREE_TYPE (vectype))))
8491 scalar_type = build_nonstandard_integer_type
8492 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8493 TYPE_UNSIGNED (scalar_type));
8494 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8495 }
8496
ebfd146a
IR
8497 return true;
8498}
8499
8500/* vectorizable_condition.
8501
b8698a0f
L
8502 Check if STMT is conditional modify expression that can be vectorized.
8503 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8504 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
8505 at GSI.
8506
8507 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8508 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 8509 else clause if it is 2).
ebfd146a
IR
8510
8511 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8512
4bbe8262 8513bool
355fe088
TS
8514vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8515 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 8516 slp_tree slp_node)
ebfd146a
IR
8517{
8518 tree scalar_dest = NULL_TREE;
8519 tree vec_dest = NULL_TREE;
01216d27
JJ
8520 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8521 tree then_clause, else_clause;
ebfd146a 8522 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 8523 tree comp_vectype = NULL_TREE;
ff802fa1
IR
8524 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8525 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 8526 tree vec_compare;
ebfd146a
IR
8527 tree new_temp;
8528 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4fc5ebf1
JG
8529 enum vect_def_type dts[4]
8530 = {vect_unknown_def_type, vect_unknown_def_type,
8531 vect_unknown_def_type, vect_unknown_def_type};
8532 int ndts = 4;
f7e531cf 8533 int ncopies;
01216d27 8534 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
a855b1b1 8535 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
8536 int i, j;
8537 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
8538 vec<tree> vec_oprnds0 = vNULL;
8539 vec<tree> vec_oprnds1 = vNULL;
8540 vec<tree> vec_oprnds2 = vNULL;
8541 vec<tree> vec_oprnds3 = vNULL;
74946978 8542 tree vec_cmp_type;
a414c77f 8543 bool masked = false;
b8698a0f 8544
f7e531cf
IR
8545 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8546 return false;
8547
bb6c2b68
RS
8548 vect_reduction_type reduction_type
8549 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8550 if (reduction_type == TREE_CODE_REDUCTION)
af29617a
AH
8551 {
8552 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8553 return false;
ebfd146a 8554
af29617a
AH
8555 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8556 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8557 && reduc_def))
8558 return false;
ebfd146a 8559
af29617a
AH
8560 /* FORNOW: not yet supported. */
8561 if (STMT_VINFO_LIVE_P (stmt_info))
8562 {
8563 if (dump_enabled_p ())
8564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8565 "value used after loop.\n");
8566 return false;
8567 }
ebfd146a
IR
8568 }
8569
8570 /* Is vectorizable conditional operation? */
8571 if (!is_gimple_assign (stmt))
8572 return false;
8573
8574 code = gimple_assign_rhs_code (stmt);
8575
8576 if (code != COND_EXPR)
8577 return false;
8578
465c8c19 8579 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2947d3b2 8580 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 8581
fce57248 8582 if (slp_node)
465c8c19
JJ
8583 ncopies = 1;
8584 else
e8f142e2 8585 ncopies = vect_get_num_copies (loop_vinfo, vectype);
465c8c19
JJ
8586
8587 gcc_assert (ncopies >= 1);
8588 if (reduc_index && ncopies > 1)
8589 return false; /* FORNOW */
8590
4e71066d
RG
8591 cond_expr = gimple_assign_rhs1 (stmt);
8592 then_clause = gimple_assign_rhs2 (stmt);
8593 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 8594
4fc5ebf1 8595 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8da4c8d8 8596 &comp_vectype, &dts[0], vectype)
e9e1d143 8597 || !comp_vectype)
ebfd146a
IR
8598 return false;
8599
81c40241 8600 gimple *def_stmt;
4fc5ebf1 8601 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
2947d3b2
IE
8602 &vectype1))
8603 return false;
4fc5ebf1 8604 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
2947d3b2 8605 &vectype2))
ebfd146a 8606 return false;
2947d3b2
IE
8607
8608 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8609 return false;
8610
8611 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
8612 return false;
8613
28b33016
IE
8614 masked = !COMPARISON_CLASS_P (cond_expr);
8615 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8616
74946978
MP
8617 if (vec_cmp_type == NULL_TREE)
8618 return false;
784fb9b3 8619
01216d27
JJ
8620 cond_code = TREE_CODE (cond_expr);
8621 if (!masked)
8622 {
8623 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8624 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8625 }
8626
8627 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8628 {
8629 /* Boolean values may have another representation in vectors
8630 and therefore we prefer bit operations over comparison for
8631 them (which also works for scalar masks). We store opcodes
8632 to use in bitop1 and bitop2. Statement is vectorized as
8633 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8634 depending on bitop1 and bitop2 arity. */
8635 switch (cond_code)
8636 {
8637 case GT_EXPR:
8638 bitop1 = BIT_NOT_EXPR;
8639 bitop2 = BIT_AND_EXPR;
8640 break;
8641 case GE_EXPR:
8642 bitop1 = BIT_NOT_EXPR;
8643 bitop2 = BIT_IOR_EXPR;
8644 break;
8645 case LT_EXPR:
8646 bitop1 = BIT_NOT_EXPR;
8647 bitop2 = BIT_AND_EXPR;
8648 std::swap (cond_expr0, cond_expr1);
8649 break;
8650 case LE_EXPR:
8651 bitop1 = BIT_NOT_EXPR;
8652 bitop2 = BIT_IOR_EXPR;
8653 std::swap (cond_expr0, cond_expr1);
8654 break;
8655 case NE_EXPR:
8656 bitop1 = BIT_XOR_EXPR;
8657 break;
8658 case EQ_EXPR:
8659 bitop1 = BIT_XOR_EXPR;
8660 bitop2 = BIT_NOT_EXPR;
8661 break;
8662 default:
8663 return false;
8664 }
8665 cond_code = SSA_NAME;
8666 }
8667
b8698a0f 8668 if (!vec_stmt)
ebfd146a
IR
8669 {
8670 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
01216d27
JJ
8671 if (bitop1 != NOP_EXPR)
8672 {
8673 machine_mode mode = TYPE_MODE (comp_vectype);
8674 optab optab;
8675
8676 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8677 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8678 return false;
8679
8680 if (bitop2 != NOP_EXPR)
8681 {
8682 optab = optab_for_tree_code (bitop2, comp_vectype,
8683 optab_default);
8684 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8685 return false;
8686 }
8687 }
4fc5ebf1
JG
8688 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8689 cond_code))
8690 {
78604de0
RB
8691 if (!slp_node)
8692 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
4fc5ebf1
JG
8693 return true;
8694 }
8695 return false;
ebfd146a
IR
8696 }
8697
f7e531cf
IR
8698 /* Transform. */
8699
8700 if (!slp_node)
8701 {
9771b263
DN
8702 vec_oprnds0.create (1);
8703 vec_oprnds1.create (1);
8704 vec_oprnds2.create (1);
8705 vec_oprnds3.create (1);
f7e531cf 8706 }
ebfd146a
IR
8707
8708 /* Handle def. */
8709 scalar_dest = gimple_assign_lhs (stmt);
bb6c2b68
RS
8710 if (reduction_type != EXTRACT_LAST_REDUCTION)
8711 vec_dest = vect_create_destination_var (scalar_dest, vectype);
ebfd146a
IR
8712
8713 /* Handle cond expr. */
a855b1b1
MM
8714 for (j = 0; j < ncopies; j++)
8715 {
bb6c2b68 8716 gimple *new_stmt = NULL;
a855b1b1
MM
8717 if (j == 0)
8718 {
f7e531cf
IR
8719 if (slp_node)
8720 {
00f96dc9
TS
8721 auto_vec<tree, 4> ops;
8722 auto_vec<vec<tree>, 4> vec_defs;
9771b263 8723
a414c77f 8724 if (masked)
01216d27 8725 ops.safe_push (cond_expr);
a414c77f
IE
8726 else
8727 {
01216d27
JJ
8728 ops.safe_push (cond_expr0);
8729 ops.safe_push (cond_expr1);
a414c77f 8730 }
9771b263
DN
8731 ops.safe_push (then_clause);
8732 ops.safe_push (else_clause);
306b0c92 8733 vect_get_slp_defs (ops, slp_node, &vec_defs);
37b5ec8f
JJ
8734 vec_oprnds3 = vec_defs.pop ();
8735 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
8736 if (!masked)
8737 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 8738 vec_oprnds0 = vec_defs.pop ();
f7e531cf
IR
8739 }
8740 else
8741 {
355fe088 8742 gimple *gtemp;
a414c77f
IE
8743 if (masked)
8744 {
8745 vec_cond_lhs
8746 = vect_get_vec_def_for_operand (cond_expr, stmt,
8747 comp_vectype);
8748 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8749 &gtemp, &dts[0]);
8750 }
8751 else
8752 {
01216d27
JJ
8753 vec_cond_lhs
8754 = vect_get_vec_def_for_operand (cond_expr0,
8755 stmt, comp_vectype);
8756 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8757
8758 vec_cond_rhs
8759 = vect_get_vec_def_for_operand (cond_expr1,
8760 stmt, comp_vectype);
8761 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
a414c77f 8762 }
f7e531cf
IR
8763 if (reduc_index == 1)
8764 vec_then_clause = reduc_def;
8765 else
8766 {
8767 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
8768 stmt);
8769 vect_is_simple_use (then_clause, loop_vinfo,
8770 &gtemp, &dts[2]);
f7e531cf
IR
8771 }
8772 if (reduc_index == 2)
8773 vec_else_clause = reduc_def;
8774 else
8775 {
8776 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
8777 stmt);
8778 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 8779 }
a855b1b1
MM
8780 }
8781 }
8782 else
8783 {
a414c77f
IE
8784 vec_cond_lhs
8785 = vect_get_vec_def_for_stmt_copy (dts[0],
8786 vec_oprnds0.pop ());
8787 if (!masked)
8788 vec_cond_rhs
8789 = vect_get_vec_def_for_stmt_copy (dts[1],
8790 vec_oprnds1.pop ());
8791
a855b1b1 8792 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 8793 vec_oprnds2.pop ());
a855b1b1 8794 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 8795 vec_oprnds3.pop ());
f7e531cf
IR
8796 }
8797
8798 if (!slp_node)
8799 {
9771b263 8800 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
8801 if (!masked)
8802 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
8803 vec_oprnds2.quick_push (vec_then_clause);
8804 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
8805 }
8806
9dc3f7de 8807 /* Arguments are ready. Create the new vector stmt. */
9771b263 8808 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 8809 {
9771b263
DN
8810 vec_then_clause = vec_oprnds2[i];
8811 vec_else_clause = vec_oprnds3[i];
a855b1b1 8812
a414c77f
IE
8813 if (masked)
8814 vec_compare = vec_cond_lhs;
8815 else
8816 {
8817 vec_cond_rhs = vec_oprnds1[i];
01216d27
JJ
8818 if (bitop1 == NOP_EXPR)
8819 vec_compare = build2 (cond_code, vec_cmp_type,
8820 vec_cond_lhs, vec_cond_rhs);
8821 else
8822 {
8823 new_temp = make_ssa_name (vec_cmp_type);
8824 if (bitop1 == BIT_NOT_EXPR)
8825 new_stmt = gimple_build_assign (new_temp, bitop1,
8826 vec_cond_rhs);
8827 else
8828 new_stmt
8829 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8830 vec_cond_rhs);
8831 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8832 if (bitop2 == NOP_EXPR)
8833 vec_compare = new_temp;
8834 else if (bitop2 == BIT_NOT_EXPR)
8835 {
8836 /* Instead of doing ~x ? y : z do x ? z : y. */
8837 vec_compare = new_temp;
8838 std::swap (vec_then_clause, vec_else_clause);
8839 }
8840 else
8841 {
8842 vec_compare = make_ssa_name (vec_cmp_type);
8843 new_stmt
8844 = gimple_build_assign (vec_compare, bitop2,
8845 vec_cond_lhs, new_temp);
8846 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8847 }
8848 }
a414c77f 8849 }
bb6c2b68
RS
8850 if (reduction_type == EXTRACT_LAST_REDUCTION)
8851 {
8852 if (!is_gimple_val (vec_compare))
8853 {
8854 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8855 new_stmt = gimple_build_assign (vec_compare_name,
8856 vec_compare);
8857 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8858 vec_compare = vec_compare_name;
8859 }
8860 gcc_assert (reduc_index == 2);
8861 new_stmt = gimple_build_call_internal
8862 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8863 vec_then_clause);
8864 gimple_call_set_lhs (new_stmt, scalar_dest);
8865 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8866 if (stmt == gsi_stmt (*gsi))
8867 vect_finish_replace_stmt (stmt, new_stmt);
8868 else
8869 {
8870 /* In this case we're moving the definition to later in the
8871 block. That doesn't matter because the only uses of the
8872 lhs are in phi statements. */
8873 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8874 gsi_remove (&old_gsi, true);
8875 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8876 }
8877 }
8878 else
8879 {
8880 new_temp = make_ssa_name (vec_dest);
8881 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8882 vec_compare, vec_then_clause,
8883 vec_else_clause);
8884 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8885 }
f7e531cf 8886 if (slp_node)
9771b263 8887 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
8888 }
8889
8890 if (slp_node)
8891 continue;
8892
8893 if (j == 0)
8894 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8895 else
8896 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8897
8898 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 8899 }
b8698a0f 8900
9771b263
DN
8901 vec_oprnds0.release ();
8902 vec_oprnds1.release ();
8903 vec_oprnds2.release ();
8904 vec_oprnds3.release ();
f7e531cf 8905
ebfd146a
IR
8906 return true;
8907}
8908
42fd8198
IE
8909/* vectorizable_comparison.
8910
8911 Check if STMT is comparison expression that can be vectorized.
8912 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8913 comparison, put it in VEC_STMT, and insert it at GSI.
8914
8915 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8916
fce57248 8917static bool
42fd8198
IE
8918vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8919 gimple **vec_stmt, tree reduc_def,
8920 slp_tree slp_node)
8921{
8922 tree lhs, rhs1, rhs2;
8923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8924 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8925 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8926 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8927 tree new_temp;
8928 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8929 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
4fc5ebf1 8930 int ndts = 2;
928686b1 8931 poly_uint64 nunits;
42fd8198 8932 int ncopies;
49e76ff1 8933 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
42fd8198
IE
8934 stmt_vec_info prev_stmt_info = NULL;
8935 int i, j;
8936 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8937 vec<tree> vec_oprnds0 = vNULL;
8938 vec<tree> vec_oprnds1 = vNULL;
8939 gimple *def_stmt;
8940 tree mask_type;
8941 tree mask;
8942
c245362b
IE
8943 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8944 return false;
8945
30480bcd 8946 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
8947 return false;
8948
8949 mask_type = vectype;
8950 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8951
fce57248 8952 if (slp_node)
42fd8198
IE
8953 ncopies = 1;
8954 else
e8f142e2 8955 ncopies = vect_get_num_copies (loop_vinfo, vectype);
42fd8198
IE
8956
8957 gcc_assert (ncopies >= 1);
42fd8198
IE
8958 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8959 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8960 && reduc_def))
8961 return false;
8962
8963 if (STMT_VINFO_LIVE_P (stmt_info))
8964 {
8965 if (dump_enabled_p ())
8966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8967 "value used after loop.\n");
8968 return false;
8969 }
8970
8971 if (!is_gimple_assign (stmt))
8972 return false;
8973
8974 code = gimple_assign_rhs_code (stmt);
8975
8976 if (TREE_CODE_CLASS (code) != tcc_comparison)
8977 return false;
8978
8979 rhs1 = gimple_assign_rhs1 (stmt);
8980 rhs2 = gimple_assign_rhs2 (stmt);
8981
8982 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8983 &dts[0], &vectype1))
8984 return false;
8985
8986 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8987 &dts[1], &vectype2))
8988 return false;
8989
8990 if (vectype1 && vectype2
928686b1
RS
8991 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8992 TYPE_VECTOR_SUBPARTS (vectype2)))
42fd8198
IE
8993 return false;
8994
8995 vectype = vectype1 ? vectype1 : vectype2;
8996
8997 /* Invariant comparison. */
8998 if (!vectype)
8999 {
69a9a66f 9000 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
928686b1 9001 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
42fd8198
IE
9002 return false;
9003 }
928686b1 9004 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
42fd8198
IE
9005 return false;
9006
49e76ff1
IE
9007 /* Can't compare mask and non-mask types. */
9008 if (vectype1 && vectype2
9009 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9010 return false;
9011
9012 /* Boolean values may have another representation in vectors
9013 and therefore we prefer bit operations over comparison for
9014 them (which also works for scalar masks). We store opcodes
9015 to use in bitop1 and bitop2. Statement is vectorized as
9016 BITOP2 (rhs1 BITOP1 rhs2) or
9017 rhs1 BITOP2 (BITOP1 rhs2)
9018 depending on bitop1 and bitop2 arity. */
9019 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9020 {
9021 if (code == GT_EXPR)
9022 {
9023 bitop1 = BIT_NOT_EXPR;
9024 bitop2 = BIT_AND_EXPR;
9025 }
9026 else if (code == GE_EXPR)
9027 {
9028 bitop1 = BIT_NOT_EXPR;
9029 bitop2 = BIT_IOR_EXPR;
9030 }
9031 else if (code == LT_EXPR)
9032 {
9033 bitop1 = BIT_NOT_EXPR;
9034 bitop2 = BIT_AND_EXPR;
9035 std::swap (rhs1, rhs2);
264d951a 9036 std::swap (dts[0], dts[1]);
49e76ff1
IE
9037 }
9038 else if (code == LE_EXPR)
9039 {
9040 bitop1 = BIT_NOT_EXPR;
9041 bitop2 = BIT_IOR_EXPR;
9042 std::swap (rhs1, rhs2);
264d951a 9043 std::swap (dts[0], dts[1]);
49e76ff1
IE
9044 }
9045 else
9046 {
9047 bitop1 = BIT_XOR_EXPR;
9048 if (code == EQ_EXPR)
9049 bitop2 = BIT_NOT_EXPR;
9050 }
9051 }
9052
42fd8198
IE
9053 if (!vec_stmt)
9054 {
9055 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
78604de0
RB
9056 if (!slp_node)
9057 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9058 dts, ndts, NULL, NULL);
49e76ff1 9059 if (bitop1 == NOP_EXPR)
96592eed 9060 return expand_vec_cmp_expr_p (vectype, mask_type, code);
49e76ff1
IE
9061 else
9062 {
9063 machine_mode mode = TYPE_MODE (vectype);
9064 optab optab;
9065
9066 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9067 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9068 return false;
9069
9070 if (bitop2 != NOP_EXPR)
9071 {
9072 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9073 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9074 return false;
9075 }
9076 return true;
9077 }
42fd8198
IE
9078 }
9079
9080 /* Transform. */
9081 if (!slp_node)
9082 {
9083 vec_oprnds0.create (1);
9084 vec_oprnds1.create (1);
9085 }
9086
9087 /* Handle def. */
9088 lhs = gimple_assign_lhs (stmt);
9089 mask = vect_create_destination_var (lhs, mask_type);
9090
9091 /* Handle cmp expr. */
9092 for (j = 0; j < ncopies; j++)
9093 {
9094 gassign *new_stmt = NULL;
9095 if (j == 0)
9096 {
9097 if (slp_node)
9098 {
9099 auto_vec<tree, 2> ops;
9100 auto_vec<vec<tree>, 2> vec_defs;
9101
9102 ops.safe_push (rhs1);
9103 ops.safe_push (rhs2);
306b0c92 9104 vect_get_slp_defs (ops, slp_node, &vec_defs);
42fd8198
IE
9105 vec_oprnds1 = vec_defs.pop ();
9106 vec_oprnds0 = vec_defs.pop ();
9107 }
9108 else
9109 {
e4af0bc4
IE
9110 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9111 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
9112 }
9113 }
9114 else
9115 {
9116 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9117 vec_oprnds0.pop ());
9118 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9119 vec_oprnds1.pop ());
9120 }
9121
9122 if (!slp_node)
9123 {
9124 vec_oprnds0.quick_push (vec_rhs1);
9125 vec_oprnds1.quick_push (vec_rhs2);
9126 }
9127
9128 /* Arguments are ready. Create the new vector stmt. */
9129 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9130 {
9131 vec_rhs2 = vec_oprnds1[i];
9132
9133 new_temp = make_ssa_name (mask);
49e76ff1
IE
9134 if (bitop1 == NOP_EXPR)
9135 {
9136 new_stmt = gimple_build_assign (new_temp, code,
9137 vec_rhs1, vec_rhs2);
9138 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9139 }
9140 else
9141 {
9142 if (bitop1 == BIT_NOT_EXPR)
9143 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9144 else
9145 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9146 vec_rhs2);
9147 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9148 if (bitop2 != NOP_EXPR)
9149 {
9150 tree res = make_ssa_name (mask);
9151 if (bitop2 == BIT_NOT_EXPR)
9152 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9153 else
9154 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9155 new_temp);
9156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9157 }
9158 }
42fd8198
IE
9159 if (slp_node)
9160 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9161 }
9162
9163 if (slp_node)
9164 continue;
9165
9166 if (j == 0)
9167 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9168 else
9169 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9170
9171 prev_stmt_info = vinfo_for_stmt (new_stmt);
9172 }
9173
9174 vec_oprnds0.release ();
9175 vec_oprnds1.release ();
9176
9177 return true;
9178}
ebfd146a 9179
68a0f2ff
RS
9180/* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9181 can handle all live statements in the node. Otherwise return true
9182 if STMT is not live or if vectorizable_live_operation can handle it.
9183 GSI and VEC_STMT are as for vectorizable_live_operation. */
9184
9185static bool
9186can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9187 slp_tree slp_node, gimple **vec_stmt)
9188{
9189 if (slp_node)
9190 {
9191 gimple *slp_stmt;
9192 unsigned int i;
9193 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9194 {
9195 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9196 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9197 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9198 vec_stmt))
9199 return false;
9200 }
9201 }
9202 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9203 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
9204 return false;
9205
9206 return true;
9207}
9208
8644a673 9209/* Make sure the statement is vectorizable. */
ebfd146a
IR
9210
9211bool
891ad31c
RB
9212vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9213 slp_instance node_instance)
ebfd146a 9214{
8644a673 9215 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 9216 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 9217 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 9218 bool ok;
355fe088 9219 gimple *pattern_stmt;
363477c0 9220 gimple_seq pattern_def_seq;
ebfd146a 9221
73fbfcad 9222 if (dump_enabled_p ())
ebfd146a 9223 {
78c60e3d
SS
9224 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9225 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 9226 }
ebfd146a 9227
1825a1f3 9228 if (gimple_has_volatile_ops (stmt))
b8698a0f 9229 {
73fbfcad 9230 if (dump_enabled_p ())
78c60e3d 9231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9232 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
9233
9234 return false;
9235 }
b8698a0f
L
9236
9237 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
9238 to include:
9239 - the COND_EXPR which is the loop exit condition
9240 - any LABEL_EXPRs in the loop
b8698a0f 9241 - computations that are used only for array indexing or loop control.
8644a673 9242 In basic blocks we only analyze statements that are a part of some SLP
83197f37 9243 instance, therefore, all the statements are relevant.
ebfd146a 9244
d092494c 9245 Pattern statement needs to be analyzed instead of the original statement
83197f37 9246 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
9247 statements. In basic blocks we are called from some SLP instance
9248 traversal, don't analyze pattern stmts instead, the pattern stmts
9249 already will be part of SLP instance. */
83197f37
IR
9250
9251 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 9252 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 9253 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 9254 {
9d5e7640 9255 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 9256 && pattern_stmt
9d5e7640
IR
9257 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9258 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9259 {
83197f37 9260 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
9261 stmt = pattern_stmt;
9262 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 9263 if (dump_enabled_p ())
9d5e7640 9264 {
78c60e3d
SS
9265 dump_printf_loc (MSG_NOTE, vect_location,
9266 "==> examining pattern statement: ");
9267 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
9268 }
9269 }
9270 else
9271 {
73fbfcad 9272 if (dump_enabled_p ())
e645e942 9273 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 9274
9d5e7640
IR
9275 return true;
9276 }
8644a673 9277 }
83197f37 9278 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 9279 && node == NULL
83197f37
IR
9280 && pattern_stmt
9281 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9282 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9283 {
9284 /* Analyze PATTERN_STMT too. */
73fbfcad 9285 if (dump_enabled_p ())
83197f37 9286 {
78c60e3d
SS
9287 dump_printf_loc (MSG_NOTE, vect_location,
9288 "==> examining pattern statement: ");
9289 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
9290 }
9291
891ad31c
RB
9292 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9293 node_instance))
83197f37
IR
9294 return false;
9295 }
ebfd146a 9296
1107f3ae 9297 if (is_pattern_stmt_p (stmt_info)
079c527f 9298 && node == NULL
363477c0 9299 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 9300 {
363477c0 9301 gimple_stmt_iterator si;
1107f3ae 9302
363477c0
JJ
9303 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9304 {
355fe088 9305 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
9306 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9307 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9308 {
9309 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 9310 if (dump_enabled_p ())
363477c0 9311 {
78c60e3d
SS
9312 dump_printf_loc (MSG_NOTE, vect_location,
9313 "==> examining pattern def statement: ");
9314 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 9315 }
1107f3ae 9316
363477c0 9317 if (!vect_analyze_stmt (pattern_def_stmt,
891ad31c 9318 need_to_vectorize, node, node_instance))
363477c0
JJ
9319 return false;
9320 }
9321 }
9322 }
1107f3ae 9323
8644a673
IR
9324 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9325 {
9326 case vect_internal_def:
9327 break;
ebfd146a 9328
8644a673 9329 case vect_reduction_def:
7c5222ff 9330 case vect_nested_cycle:
14a61437
RB
9331 gcc_assert (!bb_vinfo
9332 && (relevance == vect_used_in_outer
9333 || relevance == vect_used_in_outer_by_reduction
9334 || relevance == vect_used_by_reduction
b28ead45
AH
9335 || relevance == vect_unused_in_scope
9336 || relevance == vect_used_only_live));
8644a673
IR
9337 break;
9338
9339 case vect_induction_def:
e7baeb39
RB
9340 gcc_assert (!bb_vinfo);
9341 break;
9342
8644a673
IR
9343 case vect_constant_def:
9344 case vect_external_def:
9345 case vect_unknown_def_type:
9346 default:
9347 gcc_unreachable ();
9348 }
ebfd146a 9349
8644a673 9350 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 9351 {
8644a673 9352 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
9353 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9354 || (is_gimple_call (stmt)
9355 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 9356 *need_to_vectorize = true;
ebfd146a
IR
9357 }
9358
b1af7da6
RB
9359 if (PURE_SLP_STMT (stmt_info) && !node)
9360 {
9361 dump_printf_loc (MSG_NOTE, vect_location,
9362 "handled only by SLP analysis\n");
9363 return true;
9364 }
9365
9366 ok = true;
9367 if (!bb_vinfo
9368 && (STMT_VINFO_RELEVANT_P (stmt_info)
9369 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9370 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9371 || vectorizable_conversion (stmt, NULL, NULL, node)
9372 || vectorizable_shift (stmt, NULL, NULL, node)
9373 || vectorizable_operation (stmt, NULL, NULL, node)
9374 || vectorizable_assignment (stmt, NULL, NULL, node)
9375 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9376 || vectorizable_call (stmt, NULL, NULL, node)
9377 || vectorizable_store (stmt, NULL, NULL, node)
891ad31c 9378 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
e7baeb39 9379 || vectorizable_induction (stmt, NULL, NULL, node)
42fd8198
IE
9380 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9381 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
9382 else
9383 {
9384 if (bb_vinfo)
9385 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9386 || vectorizable_conversion (stmt, NULL, NULL, node)
9387 || vectorizable_shift (stmt, NULL, NULL, node)
9388 || vectorizable_operation (stmt, NULL, NULL, node)
9389 || vectorizable_assignment (stmt, NULL, NULL, node)
9390 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9391 || vectorizable_call (stmt, NULL, NULL, node)
9392 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
9393 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9394 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 9395 }
8644a673
IR
9396
9397 if (!ok)
ebfd146a 9398 {
73fbfcad 9399 if (dump_enabled_p ())
8644a673 9400 {
78c60e3d
SS
9401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9402 "not vectorized: relevant stmt not ");
9403 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9404 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9405 }
b8698a0f 9406
ebfd146a
IR
9407 return false;
9408 }
9409
a70d6342
IR
9410 if (bb_vinfo)
9411 return true;
9412
8644a673
IR
9413 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9414 need extra handling, except for vectorizable reductions. */
68a0f2ff
RS
9415 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9416 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
ebfd146a 9417 {
73fbfcad 9418 if (dump_enabled_p ())
8644a673 9419 {
78c60e3d 9420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
68a0f2ff 9421 "not vectorized: live stmt not supported: ");
78c60e3d 9422 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 9423 }
b8698a0f 9424
8644a673 9425 return false;
ebfd146a
IR
9426 }
9427
ebfd146a
IR
9428 return true;
9429}
9430
9431
9432/* Function vect_transform_stmt.
9433
9434 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9435
9436bool
355fe088 9437vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 9438 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
9439 slp_instance slp_node_instance)
9440{
9441 bool is_store = false;
355fe088 9442 gimple *vec_stmt = NULL;
ebfd146a 9443 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 9444 bool done;
ebfd146a 9445
fce57248 9446 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 9447 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 9448
e57d9a82
RB
9449 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9450 && nested_in_vect_loop_p
9451 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9452 stmt));
9453
ebfd146a
IR
9454 switch (STMT_VINFO_TYPE (stmt_info))
9455 {
9456 case type_demotion_vec_info_type:
ebfd146a 9457 case type_promotion_vec_info_type:
ebfd146a
IR
9458 case type_conversion_vec_info_type:
9459 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9460 gcc_assert (done);
9461 break;
9462
9463 case induc_vec_info_type:
e7baeb39 9464 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
9465 gcc_assert (done);
9466 break;
9467
9dc3f7de
IR
9468 case shift_vec_info_type:
9469 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9470 gcc_assert (done);
9471 break;
9472
ebfd146a
IR
9473 case op_vec_info_type:
9474 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9475 gcc_assert (done);
9476 break;
9477
9478 case assignment_vec_info_type:
9479 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9480 gcc_assert (done);
9481 break;
9482
9483 case load_vec_info_type:
b8698a0f 9484 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
9485 slp_node_instance);
9486 gcc_assert (done);
9487 break;
9488
9489 case store_vec_info_type:
9490 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9491 gcc_assert (done);
0d0293ac 9492 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
9493 {
9494 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 9495 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
9496 one are skipped, and there vec_stmt_info shouldn't be freed
9497 meanwhile. */
0d0293ac 9498 *grouped_store = true;
f307441a
RS
9499 stmt_vec_info group_info
9500 = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9501 if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
ebfd146a 9502 is_store = true;
f307441a 9503 }
ebfd146a
IR
9504 else
9505 is_store = true;
9506 break;
9507
9508 case condition_vec_info_type:
f7e531cf 9509 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
9510 gcc_assert (done);
9511 break;
9512
42fd8198
IE
9513 case comparison_vec_info_type:
9514 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9515 gcc_assert (done);
9516 break;
9517
ebfd146a 9518 case call_vec_info_type:
190c2236 9519 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 9520 stmt = gsi_stmt (*gsi);
ebfd146a
IR
9521 break;
9522
0136f8f0
AH
9523 case call_simd_clone_vec_info_type:
9524 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9525 stmt = gsi_stmt (*gsi);
9526 break;
9527
ebfd146a 9528 case reduc_vec_info_type:
891ad31c
RB
9529 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9530 slp_node_instance);
ebfd146a
IR
9531 gcc_assert (done);
9532 break;
9533
9534 default:
9535 if (!STMT_VINFO_LIVE_P (stmt_info))
9536 {
73fbfcad 9537 if (dump_enabled_p ())
78c60e3d 9538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9539 "stmt not supported.\n");
ebfd146a
IR
9540 gcc_unreachable ();
9541 }
9542 }
9543
225ce44b
RB
9544 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9545 This would break hybrid SLP vectorization. */
9546 if (slp_node)
d90f8440
RB
9547 gcc_assert (!vec_stmt
9548 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 9549
ebfd146a
IR
9550 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9551 is being vectorized, but outside the immediately enclosing loop. */
9552 if (vec_stmt
e57d9a82 9553 && nested_p
ebfd146a
IR
9554 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9555 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 9556 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 9557 vect_used_in_outer_by_reduction))
ebfd146a 9558 {
a70d6342
IR
9559 struct loop *innerloop = LOOP_VINFO_LOOP (
9560 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
9561 imm_use_iterator imm_iter;
9562 use_operand_p use_p;
9563 tree scalar_dest;
355fe088 9564 gimple *exit_phi;
ebfd146a 9565
73fbfcad 9566 if (dump_enabled_p ())
78c60e3d 9567 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 9568 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
9569
9570 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9571 (to be used when vectorizing outer-loop stmts that use the DEF of
9572 STMT). */
9573 if (gimple_code (stmt) == GIMPLE_PHI)
9574 scalar_dest = PHI_RESULT (stmt);
9575 else
9576 scalar_dest = gimple_assign_lhs (stmt);
9577
9578 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9579 {
9580 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9581 {
9582 exit_phi = USE_STMT (use_p);
9583 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9584 }
9585 }
9586 }
9587
9588 /* Handle stmts whose DEF is used outside the loop-nest that is
9589 being vectorized. */
68a0f2ff 9590 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
ebfd146a 9591 {
68a0f2ff 9592 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
ebfd146a
IR
9593 gcc_assert (done);
9594 }
9595
9596 if (vec_stmt)
83197f37 9597 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 9598
b8698a0f 9599 return is_store;
ebfd146a
IR
9600}
9601
9602
b8698a0f 9603/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
9604 stmt_vec_info. */
9605
9606void
355fe088 9607vect_remove_stores (gimple *first_stmt)
ebfd146a 9608{
355fe088
TS
9609 gimple *next = first_stmt;
9610 gimple *tmp;
ebfd146a
IR
9611 gimple_stmt_iterator next_si;
9612
9613 while (next)
9614 {
78048b1c
JJ
9615 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9616
9617 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9618 if (is_pattern_stmt_p (stmt_info))
9619 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
9620 /* Free the attached stmt_vec_info and remove the stmt. */
9621 next_si = gsi_for_stmt (next);
3d3f2249 9622 unlink_stmt_vdef (next);
ebfd146a 9623 gsi_remove (&next_si, true);
3d3f2249 9624 release_defs (next);
ebfd146a
IR
9625 free_stmt_vec_info (next);
9626 next = tmp;
9627 }
9628}
9629
9630
9631/* Function new_stmt_vec_info.
9632
9633 Create and initialize a new stmt_vec_info struct for STMT. */
9634
9635stmt_vec_info
310213d4 9636new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
9637{
9638 stmt_vec_info res;
9639 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9640
9641 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9642 STMT_VINFO_STMT (res) = stmt;
310213d4 9643 res->vinfo = vinfo;
8644a673 9644 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
9645 STMT_VINFO_LIVE_P (res) = false;
9646 STMT_VINFO_VECTYPE (res) = NULL;
9647 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 9648 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
9649 STMT_VINFO_IN_PATTERN_P (res) = false;
9650 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 9651 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 9652 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 9653 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
7e16ce79 9654 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
ebfd146a 9655
ebfd146a
IR
9656 if (gimple_code (stmt) == GIMPLE_PHI
9657 && is_loop_header_bb_p (gimple_bb (stmt)))
9658 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9659 else
8644a673
IR
9660 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9661
9771b263 9662 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 9663 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
9664 STMT_VINFO_NUM_SLP_USES (res) = 0;
9665
e14c1050
IR
9666 GROUP_FIRST_ELEMENT (res) = NULL;
9667 GROUP_NEXT_ELEMENT (res) = NULL;
9668 GROUP_SIZE (res) = 0;
9669 GROUP_STORE_COUNT (res) = 0;
9670 GROUP_GAP (res) = 0;
9671 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
9672
9673 return res;
9674}
9675
9676
9677/* Create a hash table for stmt_vec_info. */
9678
9679void
9680init_stmt_vec_info_vec (void)
9681{
9771b263
DN
9682 gcc_assert (!stmt_vec_info_vec.exists ());
9683 stmt_vec_info_vec.create (50);
ebfd146a
IR
9684}
9685
9686
9687/* Free hash table for stmt_vec_info. */
9688
9689void
9690free_stmt_vec_info_vec (void)
9691{
93675444 9692 unsigned int i;
3161455c 9693 stmt_vec_info info;
93675444
JJ
9694 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9695 if (info != NULL)
3161455c 9696 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
9697 gcc_assert (stmt_vec_info_vec.exists ());
9698 stmt_vec_info_vec.release ();
ebfd146a
IR
9699}
9700
9701
9702/* Free stmt vectorization related info. */
9703
9704void
355fe088 9705free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
9706{
9707 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9708
9709 if (!stmt_info)
9710 return;
9711
78048b1c
JJ
9712 /* Check if this statement has a related "pattern stmt"
9713 (introduced by the vectorizer during the pattern recognition
9714 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9715 too. */
9716 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9717 {
9718 stmt_vec_info patt_info
9719 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9720 if (patt_info)
9721 {
363477c0 9722 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 9723 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
9724 gimple_set_bb (patt_stmt, NULL);
9725 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 9726 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 9727 release_ssa_name (lhs);
363477c0
JJ
9728 if (seq)
9729 {
9730 gimple_stmt_iterator si;
9731 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 9732 {
355fe088 9733 gimple *seq_stmt = gsi_stmt (si);
f0281fde 9734 gimple_set_bb (seq_stmt, NULL);
7532abf2 9735 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 9736 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
9737 release_ssa_name (lhs);
9738 free_stmt_vec_info (seq_stmt);
9739 }
363477c0 9740 }
f0281fde 9741 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
9742 }
9743 }
9744
9771b263 9745 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 9746 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
9747 set_vinfo_for_stmt (stmt, NULL);
9748 free (stmt_info);
9749}
9750
9751
bb67d9c7 9752/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 9753
bb67d9c7 9754 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
9755 by the target. */
9756
c803b2a9 9757tree
86e36728 9758get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
ebfd146a 9759{
c7d97b28 9760 tree orig_scalar_type = scalar_type;
3bd8f481 9761 scalar_mode inner_mode;
ef4bddc2 9762 machine_mode simd_mode;
86e36728 9763 poly_uint64 nunits;
ebfd146a
IR
9764 tree vectype;
9765
3bd8f481
RS
9766 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9767 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
ebfd146a
IR
9768 return NULL_TREE;
9769
3bd8f481 9770 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
48f2e373 9771
7b7b1813
RG
9772 /* For vector types of elements whose mode precision doesn't
9773 match their types precision we use a element type of mode
9774 precision. The vectorization routines will have to make sure
48f2e373
RB
9775 they support the proper result truncation/extension.
9776 We also make sure to build vector types with INTEGER_TYPE
9777 component type only. */
6d7971b8 9778 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
9779 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9780 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
9781 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9782 TYPE_UNSIGNED (scalar_type));
6d7971b8 9783
ccbf5bb4
RG
9784 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9785 When the component mode passes the above test simply use a type
9786 corresponding to that mode. The theory is that any use that
9787 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 9788 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 9789 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
9790 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9791
9792 /* We can't build a vector type of elements with alignment bigger than
9793 their size. */
dfc2e2ac 9794 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
9795 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9796 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 9797
dfc2e2ac
RB
9798 /* If we felt back to using the mode fail if there was
9799 no scalar type for it. */
9800 if (scalar_type == NULL_TREE)
9801 return NULL_TREE;
9802
bb67d9c7
RG
9803 /* If no size was supplied use the mode the target prefers. Otherwise
9804 lookup a vector mode of the specified size. */
86e36728 9805 if (known_eq (size, 0U))
bb67d9c7 9806 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
86e36728
RS
9807 else if (!multiple_p (size, nbytes, &nunits)
9808 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9da15d40 9809 return NULL_TREE;
4c8fd8ac 9810 /* NOTE: nunits == 1 is allowed to support single element vector types. */
86e36728 9811 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
cc4b5170 9812 return NULL_TREE;
ebfd146a
IR
9813
9814 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
9815
9816 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9817 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 9818 return NULL_TREE;
ebfd146a 9819
c7d97b28
RB
9820 /* Re-attach the address-space qualifier if we canonicalized the scalar
9821 type. */
9822 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9823 return build_qualified_type
9824 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9825
ebfd146a
IR
9826 return vectype;
9827}
9828
86e36728 9829poly_uint64 current_vector_size;
bb67d9c7
RG
9830
9831/* Function get_vectype_for_scalar_type.
9832
9833 Returns the vector type corresponding to SCALAR_TYPE as supported
9834 by the target. */
9835
9836tree
9837get_vectype_for_scalar_type (tree scalar_type)
9838{
9839 tree vectype;
9840 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9841 current_vector_size);
9842 if (vectype
86e36728 9843 && known_eq (current_vector_size, 0U))
bb67d9c7
RG
9844 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9845 return vectype;
9846}
9847
42fd8198
IE
9848/* Function get_mask_type_for_scalar_type.
9849
9850 Returns the mask type corresponding to a result of comparison
9851 of vectors of specified SCALAR_TYPE as supported by target. */
9852
9853tree
9854get_mask_type_for_scalar_type (tree scalar_type)
9855{
9856 tree vectype = get_vectype_for_scalar_type (scalar_type);
9857
9858 if (!vectype)
9859 return NULL;
9860
9861 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9862 current_vector_size);
9863}
9864
b690cc0f
RG
9865/* Function get_same_sized_vectype
9866
9867 Returns a vector type corresponding to SCALAR_TYPE of size
9868 VECTOR_TYPE if supported by the target. */
9869
9870tree
bb67d9c7 9871get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 9872{
2568d8a1 9873 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9f47c7e5
IE
9874 return build_same_sized_truth_vector_type (vector_type);
9875
bb67d9c7
RG
9876 return get_vectype_for_scalar_type_and_size
9877 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
9878}
9879
ebfd146a
IR
9880/* Function vect_is_simple_use.
9881
9882 Input:
81c40241
RB
9883 VINFO - the vect info of the loop or basic block that is being vectorized.
9884 OPERAND - operand in the loop or bb.
9885 Output:
9886 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9887 DT - the type of definition
ebfd146a
IR
9888
9889 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 9890 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 9891 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 9892 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
9893 is the case in reduction/induction computations).
9894 For basic blocks, supportable operands are constants and bb invariants.
9895 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
9896
9897bool
81c40241
RB
9898vect_is_simple_use (tree operand, vec_info *vinfo,
9899 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 9900{
ebfd146a 9901 *def_stmt = NULL;
3fc356dc 9902 *dt = vect_unknown_def_type;
b8698a0f 9903
73fbfcad 9904 if (dump_enabled_p ())
ebfd146a 9905 {
78c60e3d
SS
9906 dump_printf_loc (MSG_NOTE, vect_location,
9907 "vect_is_simple_use: operand ");
9908 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 9909 dump_printf (MSG_NOTE, "\n");
ebfd146a 9910 }
b8698a0f 9911
b758f602 9912 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
9913 {
9914 *dt = vect_constant_def;
9915 return true;
9916 }
b8698a0f 9917
ebfd146a
IR
9918 if (is_gimple_min_invariant (operand))
9919 {
8644a673 9920 *dt = vect_external_def;
ebfd146a
IR
9921 return true;
9922 }
9923
ebfd146a
IR
9924 if (TREE_CODE (operand) != SSA_NAME)
9925 {
73fbfcad 9926 if (dump_enabled_p ())
af29617a
AH
9927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9928 "not ssa-name.\n");
ebfd146a
IR
9929 return false;
9930 }
b8698a0f 9931
3fc356dc 9932 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 9933 {
3fc356dc
RB
9934 *dt = vect_external_def;
9935 return true;
ebfd146a
IR
9936 }
9937
3fc356dc 9938 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 9939 if (dump_enabled_p ())
ebfd146a 9940 {
78c60e3d
SS
9941 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9942 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
9943 }
9944
61d371eb 9945 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 9946 *dt = vect_external_def;
ebfd146a
IR
9947 else
9948 {
3fc356dc 9949 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 9950 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
9951 }
9952
2e8ab70c
RB
9953 if (dump_enabled_p ())
9954 {
9955 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9956 switch (*dt)
9957 {
9958 case vect_uninitialized_def:
9959 dump_printf (MSG_NOTE, "uninitialized\n");
9960 break;
9961 case vect_constant_def:
9962 dump_printf (MSG_NOTE, "constant\n");
9963 break;
9964 case vect_external_def:
9965 dump_printf (MSG_NOTE, "external\n");
9966 break;
9967 case vect_internal_def:
9968 dump_printf (MSG_NOTE, "internal\n");
9969 break;
9970 case vect_induction_def:
9971 dump_printf (MSG_NOTE, "induction\n");
9972 break;
9973 case vect_reduction_def:
9974 dump_printf (MSG_NOTE, "reduction\n");
9975 break;
9976 case vect_double_reduction_def:
9977 dump_printf (MSG_NOTE, "double reduction\n");
9978 break;
9979 case vect_nested_cycle:
9980 dump_printf (MSG_NOTE, "nested cycle\n");
9981 break;
9982 case vect_unknown_def_type:
9983 dump_printf (MSG_NOTE, "unknown\n");
9984 break;
9985 }
9986 }
9987
81c40241 9988 if (*dt == vect_unknown_def_type)
ebfd146a 9989 {
73fbfcad 9990 if (dump_enabled_p ())
78c60e3d 9991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 9992 "Unsupported pattern.\n");
ebfd146a
IR
9993 return false;
9994 }
9995
ebfd146a
IR
9996 switch (gimple_code (*def_stmt))
9997 {
9998 case GIMPLE_PHI:
ebfd146a 9999 case GIMPLE_ASSIGN:
ebfd146a 10000 case GIMPLE_CALL:
81c40241 10001 break;
ebfd146a 10002 default:
73fbfcad 10003 if (dump_enabled_p ())
78c60e3d 10004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 10005 "unsupported defining stmt:\n");
ebfd146a
IR
10006 return false;
10007 }
10008
10009 return true;
10010}
10011
81c40241 10012/* Function vect_is_simple_use.
b690cc0f 10013
81c40241 10014 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
10015 type of OPERAND and stores it to *VECTYPE. If the definition of
10016 OPERAND is vect_uninitialized_def, vect_constant_def or
10017 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10018 is responsible to compute the best suited vector type for the
10019 scalar operand. */
10020
10021bool
81c40241
RB
10022vect_is_simple_use (tree operand, vec_info *vinfo,
10023 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 10024{
81c40241 10025 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
10026 return false;
10027
10028 /* Now get a vector type if the def is internal, otherwise supply
10029 NULL_TREE and leave it up to the caller to figure out a proper
10030 type for the use stmt. */
10031 if (*dt == vect_internal_def
10032 || *dt == vect_induction_def
10033 || *dt == vect_reduction_def
10034 || *dt == vect_double_reduction_def
10035 || *dt == vect_nested_cycle)
10036 {
10037 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
10038
10039 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10040 && !STMT_VINFO_RELEVANT (stmt_info)
10041 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 10042 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 10043
b690cc0f
RG
10044 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10045 gcc_assert (*vectype != NULL_TREE);
10046 }
10047 else if (*dt == vect_uninitialized_def
10048 || *dt == vect_constant_def
10049 || *dt == vect_external_def)
10050 *vectype = NULL_TREE;
10051 else
10052 gcc_unreachable ();
10053
10054 return true;
10055}
10056
ebfd146a
IR
10057
10058/* Function supportable_widening_operation
10059
b8698a0f
L
10060 Check whether an operation represented by the code CODE is a
10061 widening operation that is supported by the target platform in
b690cc0f
RG
10062 vector form (i.e., when operating on arguments of type VECTYPE_IN
10063 producing a result of type VECTYPE_OUT).
b8698a0f 10064
ebfd146a
IR
10065 Widening operations we currently support are NOP (CONVERT), FLOAT
10066 and WIDEN_MULT. This function checks if these operations are supported
10067 by the target platform either directly (via vector tree-codes), or via
10068 target builtins.
10069
10070 Output:
b8698a0f
L
10071 - CODE1 and CODE2 are codes of vector operations to be used when
10072 vectorizing the operation, if available.
ebfd146a
IR
10073 - MULTI_STEP_CVT determines the number of required intermediate steps in
10074 case of multi-step conversion (like char->short->int - in that case
10075 MULTI_STEP_CVT will be 1).
b8698a0f
L
10076 - INTERM_TYPES contains the intermediate type required to perform the
10077 widening operation (short in the above example). */
ebfd146a
IR
10078
10079bool
355fe088 10080supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 10081 tree vectype_out, tree vectype_in,
ebfd146a
IR
10082 enum tree_code *code1, enum tree_code *code2,
10083 int *multi_step_cvt,
9771b263 10084 vec<tree> *interm_types)
ebfd146a
IR
10085{
10086 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10087 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 10088 struct loop *vect_loop = NULL;
ef4bddc2 10089 machine_mode vec_mode;
81f40b79 10090 enum insn_code icode1, icode2;
ebfd146a 10091 optab optab1, optab2;
b690cc0f
RG
10092 tree vectype = vectype_in;
10093 tree wide_vectype = vectype_out;
ebfd146a 10094 enum tree_code c1, c2;
4a00c761
JJ
10095 int i;
10096 tree prev_type, intermediate_type;
ef4bddc2 10097 machine_mode intermediate_mode, prev_mode;
4a00c761 10098 optab optab3, optab4;
ebfd146a 10099
4a00c761 10100 *multi_step_cvt = 0;
4ef69dfc
IR
10101 if (loop_info)
10102 vect_loop = LOOP_VINFO_LOOP (loop_info);
10103
ebfd146a
IR
10104 switch (code)
10105 {
10106 case WIDEN_MULT_EXPR:
6ae6116f
RH
10107 /* The result of a vectorized widening operation usually requires
10108 two vectors (because the widened results do not fit into one vector).
10109 The generated vector results would normally be expected to be
10110 generated in the same order as in the original scalar computation,
10111 i.e. if 8 results are generated in each vector iteration, they are
10112 to be organized as follows:
10113 vect1: [res1,res2,res3,res4],
10114 vect2: [res5,res6,res7,res8].
10115
10116 However, in the special case that the result of the widening
10117 operation is used in a reduction computation only, the order doesn't
10118 matter (because when vectorizing a reduction we change the order of
10119 the computation). Some targets can take advantage of this and
10120 generate more efficient code. For example, targets like Altivec,
10121 that support widen_mult using a sequence of {mult_even,mult_odd}
10122 generate the following vectors:
10123 vect1: [res1,res3,res5,res7],
10124 vect2: [res2,res4,res6,res8].
10125
10126 When vectorizing outer-loops, we execute the inner-loop sequentially
10127 (each vectorized inner-loop iteration contributes to VF outer-loop
10128 iterations in parallel). We therefore don't allow to change the
10129 order of the computation in the inner-loop during outer-loop
10130 vectorization. */
10131 /* TODO: Another case in which order doesn't *really* matter is when we
10132 widen and then contract again, e.g. (short)((int)x * y >> 8).
10133 Normally, pack_trunc performs an even/odd permute, whereas the
10134 repack from an even/odd expansion would be an interleave, which
10135 would be significantly simpler for e.g. AVX2. */
10136 /* In any case, in order to avoid duplicating the code below, recurse
10137 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10138 are properly set up for the caller. If we fail, we'll continue with
10139 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10140 if (vect_loop
10141 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10142 && !nested_in_vect_loop_p (vect_loop, stmt)
10143 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10144 stmt, vectype_out, vectype_in,
a86ec597
RH
10145 code1, code2, multi_step_cvt,
10146 interm_types))
ebc047a2
CH
10147 {
10148 /* Elements in a vector with vect_used_by_reduction property cannot
10149 be reordered if the use chain with this property does not have the
10150 same operation. One such an example is s += a * b, where elements
10151 in a and b cannot be reordered. Here we check if the vector defined
10152 by STMT is only directly used in the reduction statement. */
10153 tree lhs = gimple_assign_lhs (stmt);
10154 use_operand_p dummy;
355fe088 10155 gimple *use_stmt;
ebc047a2
CH
10156 stmt_vec_info use_stmt_info = NULL;
10157 if (single_imm_use (lhs, &dummy, &use_stmt)
10158 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10159 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10160 return true;
10161 }
4a00c761
JJ
10162 c1 = VEC_WIDEN_MULT_LO_EXPR;
10163 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
10164 break;
10165
81c40241
RB
10166 case DOT_PROD_EXPR:
10167 c1 = DOT_PROD_EXPR;
10168 c2 = DOT_PROD_EXPR;
10169 break;
10170
10171 case SAD_EXPR:
10172 c1 = SAD_EXPR;
10173 c2 = SAD_EXPR;
10174 break;
10175
6ae6116f
RH
10176 case VEC_WIDEN_MULT_EVEN_EXPR:
10177 /* Support the recursion induced just above. */
10178 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10179 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10180 break;
10181
36ba4aae 10182 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
10183 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10184 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
10185 break;
10186
ebfd146a 10187 CASE_CONVERT:
4a00c761
JJ
10188 c1 = VEC_UNPACK_LO_EXPR;
10189 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
10190 break;
10191
10192 case FLOAT_EXPR:
4a00c761
JJ
10193 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10194 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
10195 break;
10196
10197 case FIX_TRUNC_EXPR:
10198 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10199 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10200 computing the operation. */
10201 return false;
10202
10203 default:
10204 gcc_unreachable ();
10205 }
10206
6ae6116f 10207 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 10208 std::swap (c1, c2);
4a00c761 10209
ebfd146a
IR
10210 if (code == FIX_TRUNC_EXPR)
10211 {
10212 /* The signedness is determined from output operand. */
b690cc0f
RG
10213 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10214 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
10215 }
10216 else
10217 {
10218 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10219 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10220 }
10221
10222 if (!optab1 || !optab2)
10223 return false;
10224
10225 vec_mode = TYPE_MODE (vectype);
947131ba
RS
10226 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10227 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10228 return false;
10229
4a00c761
JJ
10230 *code1 = c1;
10231 *code2 = c2;
10232
10233 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10234 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
10235 /* For scalar masks we may have different boolean
10236 vector types having the same QImode. Thus we
10237 add additional check for elements number. */
10238 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10239 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10240 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761 10241
b8698a0f 10242 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 10243 types. */
ebfd146a 10244
4a00c761
JJ
10245 prev_type = vectype;
10246 prev_mode = vec_mode;
b8698a0f 10247
4a00c761
JJ
10248 if (!CONVERT_EXPR_CODE_P (code))
10249 return false;
b8698a0f 10250
4a00c761
JJ
10251 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10252 intermediate steps in promotion sequence. We try
10253 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10254 not. */
9771b263 10255 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10256 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10257 {
10258 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10259 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10260 {
7cfb4d93 10261 intermediate_type = vect_halve_mask_nunits (prev_type);
3ae0661a
IE
10262 if (intermediate_mode != TYPE_MODE (intermediate_type))
10263 return false;
10264 }
10265 else
10266 intermediate_type
10267 = lang_hooks.types.type_for_mode (intermediate_mode,
10268 TYPE_UNSIGNED (prev_type));
10269
4a00c761
JJ
10270 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10271 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10272
10273 if (!optab3 || !optab4
10274 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10275 || insn_data[icode1].operand[0].mode != intermediate_mode
10276 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10277 || insn_data[icode2].operand[0].mode != intermediate_mode
10278 || ((icode1 = optab_handler (optab3, intermediate_mode))
10279 == CODE_FOR_nothing)
10280 || ((icode2 = optab_handler (optab4, intermediate_mode))
10281 == CODE_FOR_nothing))
10282 break;
ebfd146a 10283
9771b263 10284 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10285 (*multi_step_cvt)++;
10286
10287 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10288 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff 10289 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10290 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10291 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
4a00c761
JJ
10292
10293 prev_type = intermediate_type;
10294 prev_mode = intermediate_mode;
ebfd146a
IR
10295 }
10296
9771b263 10297 interm_types->release ();
4a00c761 10298 return false;
ebfd146a
IR
10299}
10300
10301
10302/* Function supportable_narrowing_operation
10303
b8698a0f
L
10304 Check whether an operation represented by the code CODE is a
10305 narrowing operation that is supported by the target platform in
b690cc0f
RG
10306 vector form (i.e., when operating on arguments of type VECTYPE_IN
10307 and producing a result of type VECTYPE_OUT).
b8698a0f 10308
ebfd146a 10309 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 10310 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
10311 the target platform directly via vector tree-codes.
10312
10313 Output:
b8698a0f
L
10314 - CODE1 is the code of a vector operation to be used when
10315 vectorizing the operation, if available.
ebfd146a
IR
10316 - MULTI_STEP_CVT determines the number of required intermediate steps in
10317 case of multi-step conversion (like int->short->char - in that case
10318 MULTI_STEP_CVT will be 1).
10319 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 10320 narrowing operation (short in the above example). */
ebfd146a
IR
10321
10322bool
10323supportable_narrowing_operation (enum tree_code code,
b690cc0f 10324 tree vectype_out, tree vectype_in,
ebfd146a 10325 enum tree_code *code1, int *multi_step_cvt,
9771b263 10326 vec<tree> *interm_types)
ebfd146a 10327{
ef4bddc2 10328 machine_mode vec_mode;
ebfd146a
IR
10329 enum insn_code icode1;
10330 optab optab1, interm_optab;
b690cc0f
RG
10331 tree vectype = vectype_in;
10332 tree narrow_vectype = vectype_out;
ebfd146a 10333 enum tree_code c1;
3ae0661a 10334 tree intermediate_type, prev_type;
ef4bddc2 10335 machine_mode intermediate_mode, prev_mode;
ebfd146a 10336 int i;
4a00c761 10337 bool uns;
ebfd146a 10338
4a00c761 10339 *multi_step_cvt = 0;
ebfd146a
IR
10340 switch (code)
10341 {
10342 CASE_CONVERT:
10343 c1 = VEC_PACK_TRUNC_EXPR;
10344 break;
10345
10346 case FIX_TRUNC_EXPR:
10347 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10348 break;
10349
10350 case FLOAT_EXPR:
10351 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10352 tree code and optabs used for computing the operation. */
10353 return false;
10354
10355 default:
10356 gcc_unreachable ();
10357 }
10358
10359 if (code == FIX_TRUNC_EXPR)
10360 /* The signedness is determined from output operand. */
b690cc0f 10361 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
10362 else
10363 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10364
10365 if (!optab1)
10366 return false;
10367
10368 vec_mode = TYPE_MODE (vectype);
947131ba 10369 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
10370 return false;
10371
4a00c761
JJ
10372 *code1 = c1;
10373
10374 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
10375 /* For scalar masks we may have different boolean
10376 vector types having the same QImode. Thus we
10377 add additional check for elements number. */
10378 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10379 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10380 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 10381
ebfd146a
IR
10382 /* Check if it's a multi-step conversion that can be done using intermediate
10383 types. */
4a00c761 10384 prev_mode = vec_mode;
3ae0661a 10385 prev_type = vectype;
4a00c761
JJ
10386 if (code == FIX_TRUNC_EXPR)
10387 uns = TYPE_UNSIGNED (vectype_out);
10388 else
10389 uns = TYPE_UNSIGNED (vectype);
10390
10391 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10392 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10393 costly than signed. */
10394 if (code == FIX_TRUNC_EXPR && uns)
10395 {
10396 enum insn_code icode2;
10397
10398 intermediate_type
10399 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10400 interm_optab
10401 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 10402 if (interm_optab != unknown_optab
4a00c761
JJ
10403 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10404 && insn_data[icode1].operand[0].mode
10405 == insn_data[icode2].operand[0].mode)
10406 {
10407 uns = false;
10408 optab1 = interm_optab;
10409 icode1 = icode2;
10410 }
10411 }
ebfd146a 10412
4a00c761
JJ
10413 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10414 intermediate steps in promotion sequence. We try
10415 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 10416 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
10417 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10418 {
10419 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
10420 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10421 {
7cfb4d93 10422 intermediate_type = vect_double_mask_nunits (prev_type);
3ae0661a 10423 if (intermediate_mode != TYPE_MODE (intermediate_type))
7cfb4d93 10424 return false;
3ae0661a
IE
10425 }
10426 else
10427 intermediate_type
10428 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
10429 interm_optab
10430 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10431 optab_default);
10432 if (!interm_optab
10433 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10434 || insn_data[icode1].operand[0].mode != intermediate_mode
10435 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10436 == CODE_FOR_nothing))
10437 break;
10438
9771b263 10439 interm_types->quick_push (intermediate_type);
4a00c761
JJ
10440 (*multi_step_cvt)++;
10441
10442 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff 10443 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
928686b1
RS
10444 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10445 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
10446
10447 prev_mode = intermediate_mode;
3ae0661a 10448 prev_type = intermediate_type;
4a00c761 10449 optab1 = interm_optab;
ebfd146a
IR
10450 }
10451
9771b263 10452 interm_types->release ();
4a00c761 10453 return false;
ebfd146a 10454}
7cfb4d93
RS
10455
10456/* Generate and return a statement that sets vector mask MASK such that
10457 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10458
10459gcall *
10460vect_gen_while (tree mask, tree start_index, tree end_index)
10461{
10462 tree cmp_type = TREE_TYPE (start_index);
10463 tree mask_type = TREE_TYPE (mask);
10464 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10465 cmp_type, mask_type,
10466 OPTIMIZE_FOR_SPEED));
10467 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10468 start_index, end_index,
10469 build_zero_cst (mask_type));
10470 gimple_call_set_lhs (call, mask);
10471 return call;
10472}
535e7c11
RS
10473
10474/* Generate a vector mask of type MASK_TYPE for which index I is false iff
10475 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10476
10477tree
10478vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10479 tree end_index)
10480{
10481 tree tmp = make_ssa_name (mask_type);
10482 gcall *call = vect_gen_while (tmp, start_index, end_index);
10483 gimple_seq_add_stmt (seq, call);
10484 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10485}