]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
aarch64-protos.h (aarch64_elf_asm_named_section): Remove declaration.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
818ab71a 2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
ebfd146a 51
7ee2468b
SB
52/* For lang_hooks.types.type_for_mode. */
53#include "langhooks.h"
ebfd146a 54
c3e7ee41
BS
55/* Return the vectorized type for the given statement. */
56
57tree
58stmt_vectype (struct _stmt_vec_info *stmt_info)
59{
60 return STMT_VINFO_VECTYPE (stmt_info);
61}
62
63/* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65bool
66stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
67{
355fe088 68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
72
73 if (!loop_vinfo)
74 return false;
75
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
77
78 return (bb->loop_father == loop->inner);
79}
80
81/* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
84
85unsigned
92345349 86record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 88 int misalign, enum vect_cost_model_location where)
c3e7ee41 89{
92345349 90 if (body_cost_vec)
c3e7ee41 91 {
92345349 92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
c3e7ee41 97 return (unsigned)
92345349 98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
99 }
100 else
310213d4
RB
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
103}
104
272c6793
RS
105/* Return a variable of type ELEM_TYPE[NELEMS]. */
106
107static tree
108create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109{
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
112}
113
114/* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
118
119static tree
355fe088 120read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
121 tree array, unsigned HOST_WIDE_INT n)
122{
123 tree vect_type, vect, vect_name, array_ref;
355fe088 124 gimple *new_stmt;
272c6793
RS
125
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
132
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
137
138 return vect_name;
139}
140
141/* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
144
145static void
355fe088 146write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
147 tree array, unsigned HOST_WIDE_INT n)
148{
149 tree array_ref;
355fe088 150 gimple *new_stmt;
272c6793
RS
151
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
158}
159
160/* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
163
164static tree
165create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166{
272c6793
RS
167 tree mem_ref, alias_ptr_type;
168
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
644ffefd 172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
173 return mem_ref;
174}
175
ebfd146a
IR
176/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177
178/* Function vect_mark_relevant.
179
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181
182static void
355fe088 183vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 184 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
185{
186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
187 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
188 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 189 gimple *pattern_stmt;
ebfd146a 190
73fbfcad 191 if (dump_enabled_p ())
66c16fd9
RB
192 {
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d: ", relevant, live_p);
195 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
196 }
ebfd146a 197
83197f37
IR
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
ebfd146a
IR
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
203 {
97ecdb46
JJ
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
83197f37 208
97ecdb46
JJ
209 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
210
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE, vect_location,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info = vinfo_for_stmt (pattern_stmt);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
217 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
218 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
219 stmt = pattern_stmt;
ebfd146a
IR
220 }
221
222 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
223 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
224 STMT_VINFO_RELEVANT (stmt_info) = relevant;
225
226 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
228 {
73fbfcad 229 if (dump_enabled_p ())
78c60e3d 230 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 231 "already marked relevant/live.\n");
ebfd146a
IR
232 return;
233 }
234
9771b263 235 worklist->safe_push (stmt);
ebfd146a
IR
236}
237
238
b28ead45
AH
239/* Function is_simple_and_all_uses_invariant
240
241 Return true if STMT is simple and all uses of it are invariant. */
242
243bool
244is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
245{
246 tree op;
247 gimple *def_stmt;
248 ssa_op_iter iter;
249
250 if (!is_gimple_assign (stmt))
251 return false;
252
253 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
254 {
255 enum vect_def_type dt = vect_uninitialized_def;
256
257 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
258 {
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
261 "use not simple.\n");
262 return false;
263 }
264
265 if (dt != vect_external_def && dt != vect_constant_def)
266 return false;
267 }
268 return true;
269}
270
ebfd146a
IR
271/* Function vect_stmt_relevant_p.
272
273 Return true if STMT in loop that is represented by LOOP_VINFO is
274 "relevant for vectorization".
275
276 A stmt is considered "relevant for vectorization" if:
277 - it has uses outside the loop.
278 - it has vdefs (it alters memory).
279 - control stmts in the loop (except for the exit condition).
280
281 CHECKME: what other side effects would the vectorizer allow? */
282
283static bool
355fe088 284vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
285 enum vect_relevant *relevant, bool *live_p)
286{
287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
288 ssa_op_iter op_iter;
289 imm_use_iterator imm_iter;
290 use_operand_p use_p;
291 def_operand_p def_p;
292
8644a673 293 *relevant = vect_unused_in_scope;
ebfd146a
IR
294 *live_p = false;
295
296 /* cond stmt other than loop exit cond. */
b8698a0f
L
297 if (is_ctrl_stmt (stmt)
298 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
299 != loop_exit_ctrl_vec_info_type)
8644a673 300 *relevant = vect_used_in_scope;
ebfd146a
IR
301
302 /* changing memory. */
303 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
304 if (gimple_vdef (stmt)
305 && !gimple_clobber_p (stmt))
ebfd146a 306 {
73fbfcad 307 if (dump_enabled_p ())
78c60e3d 308 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 309 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 310 *relevant = vect_used_in_scope;
ebfd146a
IR
311 }
312
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
315 {
316 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
317 {
318 basic_block bb = gimple_bb (USE_STMT (use_p));
319 if (!flow_bb_inside_loop_p (loop, bb))
320 {
73fbfcad 321 if (dump_enabled_p ())
78c60e3d 322 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 323 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 324
3157b0c2
AO
325 if (is_gimple_debug (USE_STMT (use_p)))
326 continue;
327
ebfd146a
IR
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
331 gcc_assert (bb == single_exit (loop)->dest);
332
333 *live_p = true;
334 }
335 }
336 }
337
3a2edf4c
AH
338 if (*live_p && *relevant == vect_unused_in_scope
339 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
340 {
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE, vect_location,
343 "vec_stmt_relevant_p: stmt live but not relevant.\n");
344 *relevant = vect_used_only_live;
345 }
346
ebfd146a
IR
347 return (*live_p || *relevant);
348}
349
350
b8698a0f 351/* Function exist_non_indexing_operands_for_use_p
ebfd146a 352
ff802fa1 353 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
354 used in STMT for anything other than indexing an array. */
355
356static bool
355fe088 357exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
358{
359 tree operand;
360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 361
ff802fa1 362 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info))
366 return true;
59a05b0c 367
ebfd146a
IR
368 /* STMT has a data_ref. FORNOW this means that its of one of
369 the following forms:
370 -1- ARRAY_REF = var
371 -2- var = ARRAY_REF
372 (This should have been verified in analyze_data_refs).
373
374 'var' in the second case corresponds to a def, not a use,
b8698a0f 375 so USE cannot correspond to any operands that are not used
ebfd146a
IR
376 for array indexing.
377
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
ebfd146a
IR
380
381 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
382 {
383 if (is_gimple_call (stmt)
384 && gimple_call_internal_p (stmt))
385 switch (gimple_call_internal_fn (stmt))
386 {
387 case IFN_MASK_STORE:
388 operand = gimple_call_arg (stmt, 3);
389 if (operand == use)
390 return true;
391 /* FALLTHRU */
392 case IFN_MASK_LOAD:
393 operand = gimple_call_arg (stmt, 2);
394 if (operand == use)
395 return true;
396 break;
397 default:
398 break;
399 }
400 return false;
401 }
402
59a05b0c
EB
403 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
404 return false;
ebfd146a 405 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
406 if (TREE_CODE (operand) != SSA_NAME)
407 return false;
408
409 if (operand == use)
410 return true;
411
412 return false;
413}
414
415
b8698a0f 416/*
ebfd146a
IR
417 Function process_use.
418
419 Inputs:
420 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 421 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 422 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
425 be performed.
ebfd146a
IR
426
427 Outputs:
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
432 Exceptions:
433 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 434 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 435 of the respective DEF_STMT is left unchanged.
b8698a0f
L
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
440
441 Return true if everything is as expected. Return false otherwise. */
442
443static bool
b28ead45 444process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 445 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 446 bool force)
ebfd146a
IR
447{
448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
449 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
450 stmt_vec_info dstmt_vinfo;
451 basic_block bb, def_bb;
355fe088 452 gimple *def_stmt;
ebfd146a
IR
453 enum vect_def_type dt;
454
b8698a0f 455 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 456 that are used for address computation are not considered relevant. */
aec7ae7d 457 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
458 return true;
459
81c40241 460 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 461 {
73fbfcad 462 if (dump_enabled_p ())
78c60e3d 463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 464 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
465 return false;
466 }
467
468 if (!def_stmt || gimple_nop_p (def_stmt))
469 return true;
470
471 def_bb = gimple_bb (def_stmt);
472 if (!flow_bb_inside_loop_p (loop, def_bb))
473 {
73fbfcad 474 if (dump_enabled_p ())
e645e942 475 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
476 return true;
477 }
478
b8698a0f
L
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo = vinfo_for_stmt (def_stmt);
485 bb = gimple_bb (stmt);
486 if (gimple_code (stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (def_stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
491 {
73fbfcad 492 if (dump_enabled_p ())
78c60e3d 493 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 494 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
496 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
500 return true;
501 }
502
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
505 d = def_stmt
506 inner-loop:
507 stmt # use (d)
508 outer-loop-tail-bb:
509 ... */
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
511 {
73fbfcad 512 if (dump_enabled_p ())
78c60e3d 513 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 514 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 515
ebfd146a
IR
516 switch (relevant)
517 {
8644a673 518 case vect_unused_in_scope:
7c5222ff
IR
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 521 break;
7c5222ff 522
ebfd146a 523 case vect_used_in_outer_by_reduction:
7c5222ff 524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
525 relevant = vect_used_by_reduction;
526 break;
7c5222ff 527
ebfd146a 528 case vect_used_in_outer:
7c5222ff 529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 530 relevant = vect_used_in_scope;
ebfd146a 531 break;
7c5222ff 532
8644a673 533 case vect_used_in_scope:
ebfd146a
IR
534 break;
535
536 default:
537 gcc_unreachable ();
b8698a0f 538 }
ebfd146a
IR
539 }
540
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
543 ...
544 inner-loop:
545 d = def_stmt
06066f92 546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
547 stmt # use (d) */
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
549 {
73fbfcad 550 if (dump_enabled_p ())
78c60e3d 551 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 552 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 553
ebfd146a
IR
554 switch (relevant)
555 {
8644a673 556 case vect_unused_in_scope:
b8698a0f 557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
560 break;
561
ebfd146a 562 case vect_used_by_reduction:
b28ead45 563 case vect_used_only_live:
ebfd146a
IR
564 relevant = vect_used_in_outer_by_reduction;
565 break;
566
8644a673 567 case vect_used_in_scope:
ebfd146a
IR
568 relevant = vect_used_in_outer;
569 break;
570
571 default:
572 gcc_unreachable ();
573 }
574 }
575
b28ead45 576 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
577 return true;
578}
579
580
581/* Function vect_mark_stmts_to_be_vectorized.
582
583 Not all stmts in the loop need to be vectorized. For example:
584
585 for i...
586 for j...
587 1. T0 = i + j
588 2. T1 = a[T0]
589
590 3. j = j + 1
591
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
594
595 This pass detects such stmts. */
596
597bool
598vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
599{
ebfd146a
IR
600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
602 unsigned int nbbs = loop->num_nodes;
603 gimple_stmt_iterator si;
355fe088 604 gimple *stmt;
ebfd146a
IR
605 unsigned int i;
606 stmt_vec_info stmt_vinfo;
607 basic_block bb;
355fe088 608 gimple *phi;
ebfd146a 609 bool live_p;
b28ead45 610 enum vect_relevant relevant;
ebfd146a 611
73fbfcad 612 if (dump_enabled_p ())
78c60e3d 613 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 614 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 615
355fe088 616 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
617
618 /* 1. Init worklist. */
619 for (i = 0; i < nbbs; i++)
620 {
621 bb = bbs[i];
622 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 623 {
ebfd146a 624 phi = gsi_stmt (si);
73fbfcad 625 if (dump_enabled_p ())
ebfd146a 626 {
78c60e3d
SS
627 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
629 }
630
631 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 632 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
633 }
634 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
635 {
636 stmt = gsi_stmt (si);
73fbfcad 637 if (dump_enabled_p ())
ebfd146a 638 {
78c60e3d
SS
639 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 641 }
ebfd146a
IR
642
643 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 644 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
645 }
646 }
647
648 /* 2. Process_worklist */
9771b263 649 while (worklist.length () > 0)
ebfd146a
IR
650 {
651 use_operand_p use_p;
652 ssa_op_iter iter;
653
9771b263 654 stmt = worklist.pop ();
73fbfcad 655 if (dump_enabled_p ())
ebfd146a 656 {
78c60e3d
SS
657 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
659 }
660
b8698a0f 661 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
662 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 of STMT. */
ebfd146a
IR
664 stmt_vinfo = vinfo_for_stmt (stmt);
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 666
b28ead45
AH
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
669
670 One exception is when STMT has been identified as defining a reduction
b28ead45 671 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 672 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 673 those that are used by a reduction computation, and those that are
ff802fa1 674 (also) used by a regular computation. This allows us later on to
b8698a0f 675 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 676 order of the results that they produce does not have to be kept. */
ebfd146a 677
b28ead45 678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 679 {
06066f92 680 case vect_reduction_def:
b28ead45
AH
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
06066f92 686 {
b28ead45
AH
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "unsupported use of reduction.\n");
690 return false;
06066f92 691 }
06066f92 692 break;
b8698a0f 693
06066f92 694 case vect_nested_cycle:
b28ead45
AH
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_in_outer_by_reduction
697 && relevant != vect_used_in_outer)
06066f92 698 {
73fbfcad 699 if (dump_enabled_p ())
78c60e3d 700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 701 "unsupported use of nested cycle.\n");
7c5222ff 702
06066f92
IR
703 return false;
704 }
b8698a0f
L
705 break;
706
06066f92 707 case vect_double_reduction_def:
b28ead45
AH
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_by_reduction
710 && relevant != vect_used_only_live)
06066f92 711 {
73fbfcad 712 if (dump_enabled_p ())
78c60e3d 713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 714 "unsupported use of double reduction.\n");
7c5222ff 715
7c5222ff 716 return false;
06066f92 717 }
b8698a0f 718 break;
7c5222ff 719
06066f92
IR
720 default:
721 break;
7c5222ff 722 }
b8698a0f 723
aec7ae7d 724 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
725 {
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
730 {
69d2aade
JJ
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
733
734 i = 1;
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
736 {
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 738 relevant, &worklist, false)
69d2aade 739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 740 relevant, &worklist, false))
566d377a 741 return false;
69d2aade
JJ
742 i = 2;
743 }
744 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 745 {
69d2aade 746 op = gimple_op (stmt, i);
afbe6325 747 if (TREE_CODE (op) == SSA_NAME
b28ead45 748 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 749 &worklist, false))
07687835 750 return false;
9d5e7640
IR
751 }
752 }
753 else if (is_gimple_call (stmt))
754 {
755 for (i = 0; i < gimple_call_num_args (stmt); i++)
756 {
757 tree arg = gimple_call_arg (stmt, i);
b28ead45 758 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 759 &worklist, false))
07687835 760 return false;
9d5e7640
IR
761 }
762 }
763 }
764 else
765 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
766 {
767 tree op = USE_FROM_PTR (use_p);
b28ead45 768 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 769 &worklist, false))
07687835 770 return false;
9d5e7640 771 }
aec7ae7d 772
3bab6342 773 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d
JJ
774 {
775 tree off;
3bab6342 776 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
aec7ae7d 777 gcc_assert (decl);
b28ead45 778 if (!process_use (stmt, off, loop_vinfo, relevant, &worklist, true))
566d377a 779 return false;
aec7ae7d 780 }
ebfd146a
IR
781 } /* while worklist */
782
ebfd146a
IR
783 return true;
784}
785
786
b8698a0f 787/* Function vect_model_simple_cost.
ebfd146a 788
b8698a0f 789 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
790 single op. Right now, this does not account for multiple insns that could
791 be generated for the single vector op. We will handle that shortly. */
792
793void
b8698a0f 794vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
795 enum vect_def_type *dt,
796 stmt_vector_for_cost *prologue_cost_vec,
797 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
798{
799 int i;
92345349 800 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
801
802 /* The SLP costs were already calculated during SLP tree build. */
803 if (PURE_SLP_STMT (stmt_info))
804 return;
805
ebfd146a
IR
806 /* FORNOW: Assuming maximum 2 args per stmts. */
807 for (i = 0; i < 2; i++)
92345349
BS
808 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
809 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
810 stmt_info, 0, vect_prologue);
c3e7ee41
BS
811
812 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
813 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
814 stmt_info, 0, vect_body);
c3e7ee41 815
73fbfcad 816 if (dump_enabled_p ())
78c60e3d
SS
817 dump_printf_loc (MSG_NOTE, vect_location,
818 "vect_model_simple_cost: inside_cost = %d, "
e645e942 819 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
820}
821
822
8bd37302
BS
823/* Model cost for type demotion and promotion operations. PWR is normally
824 zero for single-step promotions and demotions. It will be one if
825 two-step promotion/demotion is required, and so on. Each additional
826 step doubles the number of instructions required. */
827
828static void
829vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
830 enum vect_def_type *dt, int pwr)
831{
832 int i, tmp;
92345349 833 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
834 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
835 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
836 void *target_cost_data;
8bd37302
BS
837
838 /* The SLP costs were already calculated during SLP tree build. */
839 if (PURE_SLP_STMT (stmt_info))
840 return;
841
c3e7ee41
BS
842 if (loop_vinfo)
843 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
844 else
845 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
846
8bd37302
BS
847 for (i = 0; i < pwr + 1; i++)
848 {
849 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
850 (i + 1) : i;
c3e7ee41 851 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
852 vec_promote_demote, stmt_info, 0,
853 vect_body);
8bd37302
BS
854 }
855
856 /* FORNOW: Assuming maximum 2 args per stmts. */
857 for (i = 0; i < 2; i++)
92345349
BS
858 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
859 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
860 stmt_info, 0, vect_prologue);
8bd37302 861
73fbfcad 862 if (dump_enabled_p ())
78c60e3d
SS
863 dump_printf_loc (MSG_NOTE, vect_location,
864 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 865 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
866}
867
0d0293ac 868/* Function vect_cost_group_size
b8698a0f 869
0d0293ac 870 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
871 load or store of a group, else return 1. This ensures that group size is
872 only returned once per group. */
873
874static int
0d0293ac 875vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 876{
355fe088 877 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
878
879 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 880 return GROUP_SIZE (stmt_info);
ebfd146a
IR
881
882 return 1;
883}
884
885
886/* Function vect_model_store_cost
887
0d0293ac
MM
888 Models cost for stores. In the case of grouped accesses, one access
889 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
890
891void
b8698a0f 892vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 893 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
894 slp_tree slp_node,
895 stmt_vector_for_cost *prologue_cost_vec,
896 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
897{
898 int group_size;
92345349 899 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239 900 struct data_reference *first_dr;
355fe088 901 gimple *first_stmt;
ebfd146a 902
8644a673 903 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
904 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
905 stmt_info, 0, vect_prologue);
ebfd146a 906
0d0293ac
MM
907 /* Grouped access? */
908 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
909 {
910 if (slp_node)
911 {
9771b263 912 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
913 group_size = 1;
914 }
915 else
916 {
e14c1050 917 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 918 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
919 }
920
921 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
922 }
0d0293ac 923 /* Not a grouped access. */
ebfd146a 924 else
720f5239
IR
925 {
926 group_size = 1;
927 first_dr = STMT_VINFO_DATA_REF (stmt_info);
928 }
ebfd146a 929
272c6793 930 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 931 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
932 access is instead being provided by a permute-and-store operation,
933 include the cost of the permutes. */
cee62fee
MM
934 if (!store_lanes_p && group_size > 1
935 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 936 {
e1377713
ES
937 /* Uses a high and low interleave or shuffle operations for each
938 needed permute. */
939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
ebfd146a 942
73fbfcad 943 if (dump_enabled_p ())
78c60e3d 944 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 945 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 946 group_size);
ebfd146a
IR
947 }
948
cee62fee 949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 950 /* Costs of the stores. */
cee62fee
MM
951 if (STMT_VINFO_STRIDED_P (stmt_info)
952 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
f2e2a985
MM
953 {
954 /* N scalar stores plus extracting the elements. */
f2e2a985
MM
955 inside_cost += record_stmt_cost (body_cost_vec,
956 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
957 scalar_store, stmt_info, 0, vect_body);
f2e2a985
MM
958 }
959 else
960 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 961
cee62fee
MM
962 if (STMT_VINFO_STRIDED_P (stmt_info))
963 inside_cost += record_stmt_cost (body_cost_vec,
964 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
965 vec_to_scalar, stmt_info, 0, vect_body);
966
73fbfcad 967 if (dump_enabled_p ())
78c60e3d
SS
968 dump_printf_loc (MSG_NOTE, vect_location,
969 "vect_model_store_cost: inside_cost = %d, "
e645e942 970 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
971}
972
973
720f5239
IR
974/* Calculate cost of DR's memory access. */
975void
976vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 977 unsigned int *inside_cost,
92345349 978 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
979{
980 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 981 gimple *stmt = DR_STMT (dr);
c3e7ee41 982 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
983
984 switch (alignment_support_scheme)
985 {
986 case dr_aligned:
987 {
92345349
BS
988 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
989 vector_store, stmt_info, 0,
990 vect_body);
720f5239 991
73fbfcad 992 if (dump_enabled_p ())
78c60e3d 993 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 994 "vect_model_store_cost: aligned.\n");
720f5239
IR
995 break;
996 }
997
998 case dr_unaligned_supported:
999 {
720f5239 1000 /* Here, we assign an additional cost for the unaligned store. */
92345349 1001 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1002 unaligned_store, stmt_info,
92345349 1003 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1004 if (dump_enabled_p ())
78c60e3d
SS
1005 dump_printf_loc (MSG_NOTE, vect_location,
1006 "vect_model_store_cost: unaligned supported by "
e645e942 1007 "hardware.\n");
720f5239
IR
1008 break;
1009 }
1010
38eec4c6
UW
1011 case dr_unaligned_unsupported:
1012 {
1013 *inside_cost = VECT_MAX_COST;
1014
73fbfcad 1015 if (dump_enabled_p ())
78c60e3d 1016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1017 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1018 break;
1019 }
1020
720f5239
IR
1021 default:
1022 gcc_unreachable ();
1023 }
1024}
1025
1026
ebfd146a
IR
1027/* Function vect_model_load_cost
1028
0d0293ac
MM
1029 Models cost for loads. In the case of grouped accesses, the last access
1030 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1031 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1032 access scheme chosen. */
1033
1034void
92345349
BS
1035vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1036 bool load_lanes_p, slp_tree slp_node,
1037 stmt_vector_for_cost *prologue_cost_vec,
1038 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1039{
1040 int group_size;
355fe088 1041 gimple *first_stmt;
ebfd146a 1042 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1043 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a 1044
0d0293ac 1045 /* Grouped accesses? */
e14c1050 1046 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1048 {
0d0293ac 1049 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1050 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1051 }
0d0293ac 1052 /* Not a grouped access. */
ebfd146a
IR
1053 else
1054 {
1055 group_size = 1;
1056 first_dr = dr;
1057 }
1058
272c6793 1059 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1060 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1061 access is instead being provided by a load-and-permute operation,
1062 include the cost of the permutes. */
7b5fc413 1063 if (!load_lanes_p && group_size > 1
f2e2a985 1064 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 1065 {
2c23db6d
ES
1066 /* Uses an even and odd extract operations or shuffle operations
1067 for each needed permute. */
1068 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1069 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1070 stmt_info, 0, vect_body);
ebfd146a 1071
73fbfcad 1072 if (dump_enabled_p ())
e645e942
TJ
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1075 group_size);
ebfd146a
IR
1076 }
1077
1078 /* The loads themselves. */
f2e2a985 1079 if (STMT_VINFO_STRIDED_P (stmt_info)
7b5fc413 1080 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
a82960aa 1081 {
a21892ad
BS
1082 /* N scalar loads plus gathering them into a vector. */
1083 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1084 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1085 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349 1086 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1087 }
1088 else
1089 vect_get_load_cost (first_dr, ncopies,
1090 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1091 || group_size > 1 || slp_node),
92345349
BS
1092 &inside_cost, &prologue_cost,
1093 prologue_cost_vec, body_cost_vec, true);
f2e2a985 1094 if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413
RB
1095 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1096 stmt_info, 0, vect_body);
720f5239 1097
73fbfcad 1098 if (dump_enabled_p ())
78c60e3d
SS
1099 dump_printf_loc (MSG_NOTE, vect_location,
1100 "vect_model_load_cost: inside_cost = %d, "
e645e942 1101 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1102}
1103
1104
1105/* Calculate cost of DR's memory access. */
1106void
1107vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1108 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1109 unsigned int *prologue_cost,
1110 stmt_vector_for_cost *prologue_cost_vec,
1111 stmt_vector_for_cost *body_cost_vec,
1112 bool record_prologue_costs)
720f5239
IR
1113{
1114 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1115 gimple *stmt = DR_STMT (dr);
c3e7ee41 1116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1117
1118 switch (alignment_support_scheme)
ebfd146a
IR
1119 {
1120 case dr_aligned:
1121 {
92345349
BS
1122 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1123 stmt_info, 0, vect_body);
ebfd146a 1124
73fbfcad 1125 if (dump_enabled_p ())
78c60e3d 1126 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1127 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1128
1129 break;
1130 }
1131 case dr_unaligned_supported:
1132 {
720f5239 1133 /* Here, we assign an additional cost for the unaligned load. */
92345349 1134 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1135 unaligned_load, stmt_info,
92345349 1136 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1137
73fbfcad 1138 if (dump_enabled_p ())
78c60e3d
SS
1139 dump_printf_loc (MSG_NOTE, vect_location,
1140 "vect_model_load_cost: unaligned supported by "
e645e942 1141 "hardware.\n");
ebfd146a
IR
1142
1143 break;
1144 }
1145 case dr_explicit_realign:
1146 {
92345349
BS
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1148 vector_load, stmt_info, 0, vect_body);
1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1150 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1151
1152 /* FIXME: If the misalignment remains fixed across the iterations of
1153 the containing loop, the following cost should be added to the
92345349 1154 prologue costs. */
ebfd146a 1155 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1156 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1157 stmt_info, 0, vect_body);
ebfd146a 1158
73fbfcad 1159 if (dump_enabled_p ())
e645e942
TJ
1160 dump_printf_loc (MSG_NOTE, vect_location,
1161 "vect_model_load_cost: explicit realign\n");
8bd37302 1162
ebfd146a
IR
1163 break;
1164 }
1165 case dr_explicit_realign_optimized:
1166 {
73fbfcad 1167 if (dump_enabled_p ())
e645e942 1168 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1169 "vect_model_load_cost: unaligned software "
e645e942 1170 "pipelined.\n");
ebfd146a
IR
1171
1172 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1173 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1174 if this is an access in a group of loads, which provide grouped
ebfd146a 1175 access, then the above cost should only be considered for one
ff802fa1 1176 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1177 and a realignment op. */
1178
92345349 1179 if (add_realign_cost && record_prologue_costs)
ebfd146a 1180 {
92345349
BS
1181 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1182 vector_stmt, stmt_info,
1183 0, vect_prologue);
ebfd146a 1184 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1186 vector_stmt, stmt_info,
1187 0, vect_prologue);
ebfd146a
IR
1188 }
1189
92345349
BS
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1191 stmt_info, 0, vect_body);
1192 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1193 stmt_info, 0, vect_body);
8bd37302 1194
73fbfcad 1195 if (dump_enabled_p ())
78c60e3d 1196 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1197 "vect_model_load_cost: explicit realign optimized"
1198 "\n");
8bd37302 1199
ebfd146a
IR
1200 break;
1201 }
1202
38eec4c6
UW
1203 case dr_unaligned_unsupported:
1204 {
1205 *inside_cost = VECT_MAX_COST;
1206
73fbfcad 1207 if (dump_enabled_p ())
78c60e3d 1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1209 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1210 break;
1211 }
1212
ebfd146a
IR
1213 default:
1214 gcc_unreachable ();
1215 }
ebfd146a
IR
1216}
1217
418b7df3
RG
1218/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1219 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1220
418b7df3 1221static void
355fe088 1222vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1223{
ebfd146a 1224 if (gsi)
418b7df3 1225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1226 else
1227 {
418b7df3 1228 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1230
a70d6342
IR
1231 if (loop_vinfo)
1232 {
1233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1234 basic_block new_bb;
1235 edge pe;
a70d6342
IR
1236
1237 if (nested_in_vect_loop_p (loop, stmt))
1238 loop = loop->inner;
b8698a0f 1239
a70d6342 1240 pe = loop_preheader_edge (loop);
418b7df3 1241 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1242 gcc_assert (!new_bb);
1243 }
1244 else
1245 {
1246 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1247 basic_block bb;
1248 gimple_stmt_iterator gsi_bb_start;
1249
1250 gcc_assert (bb_vinfo);
1251 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1252 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1253 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1254 }
ebfd146a
IR
1255 }
1256
73fbfcad 1257 if (dump_enabled_p ())
ebfd146a 1258 {
78c60e3d
SS
1259 dump_printf_loc (MSG_NOTE, vect_location,
1260 "created new init_stmt: ");
1261 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1262 }
418b7df3
RG
1263}
1264
1265/* Function vect_init_vector.
ebfd146a 1266
5467ee52
RG
1267 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1268 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1269 vector type a vector with all elements equal to VAL is created first.
1270 Place the initialization at BSI if it is not NULL. Otherwise, place the
1271 initialization at the loop preheader.
418b7df3
RG
1272 Return the DEF of INIT_STMT.
1273 It will be used in the vectorization of STMT. */
1274
1275tree
355fe088 1276vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1277{
355fe088 1278 gimple *init_stmt;
418b7df3
RG
1279 tree new_temp;
1280
e412ece4
RB
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1283 {
e412ece4
RB
1284 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1285 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1286 {
5a308cf1
IE
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type))
1290 {
b3d51f23
IE
1291 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1292 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1293
1294 if (CONSTANT_CLASS_P (val))
1295 val = integer_zerop (val) ? false_val : true_val;
1296 else
1297 {
1298 new_temp = make_ssa_name (TREE_TYPE (type));
1299 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1300 val, true_val, false_val);
1301 vect_init_vector_1 (stmt, init_stmt, gsi);
1302 val = new_temp;
1303 }
1304 }
1305 else if (CONSTANT_CLASS_P (val))
42fd8198 1306 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1307 else
1308 {
b731b390 1309 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1310 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1311 init_stmt = gimple_build_assign (new_temp,
1312 fold_build1 (VIEW_CONVERT_EXPR,
1313 TREE_TYPE (type),
1314 val));
1315 else
1316 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1317 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1318 val = new_temp;
418b7df3
RG
1319 }
1320 }
5467ee52 1321 val = build_vector_from_val (type, val);
418b7df3
RG
1322 }
1323
0e22bb5a
RB
1324 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1325 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1326 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1327 return new_temp;
ebfd146a
IR
1328}
1329
c83a894c 1330/* Function vect_get_vec_def_for_operand_1.
a70d6342 1331
c83a894c
AH
1332 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1333 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1334
1335tree
c83a894c 1336vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1337{
1338 tree vec_oprnd;
355fe088 1339 gimple *vec_stmt;
ebfd146a 1340 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1341
1342 switch (dt)
1343 {
81c40241 1344 /* operand is a constant or a loop invariant. */
ebfd146a 1345 case vect_constant_def:
81c40241 1346 case vect_external_def:
c83a894c
AH
1347 /* Code should use vect_get_vec_def_for_operand. */
1348 gcc_unreachable ();
ebfd146a 1349
81c40241 1350 /* operand is defined inside the loop. */
8644a673 1351 case vect_internal_def:
ebfd146a 1352 {
ebfd146a
IR
1353 /* Get the def from the vectorized stmt. */
1354 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1355
ebfd146a 1356 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1357 /* Get vectorized pattern statement. */
1358 if (!vec_stmt
1359 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1360 && !STMT_VINFO_RELEVANT (def_stmt_info))
1361 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1362 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1363 gcc_assert (vec_stmt);
1364 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1365 vec_oprnd = PHI_RESULT (vec_stmt);
1366 else if (is_gimple_call (vec_stmt))
1367 vec_oprnd = gimple_call_lhs (vec_stmt);
1368 else
1369 vec_oprnd = gimple_assign_lhs (vec_stmt);
1370 return vec_oprnd;
1371 }
1372
81c40241 1373 /* operand is defined by a loop header phi - reduction */
ebfd146a 1374 case vect_reduction_def:
06066f92 1375 case vect_double_reduction_def:
7c5222ff 1376 case vect_nested_cycle:
81c40241
RB
1377 /* Code should use get_initial_def_for_reduction. */
1378 gcc_unreachable ();
ebfd146a 1379
81c40241 1380 /* operand is defined by loop-header phi - induction. */
ebfd146a
IR
1381 case vect_induction_def:
1382 {
1383 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1384
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info = vinfo_for_stmt (def_stmt);
1387 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1388 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt);
1390 else
1391 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1392 return vec_oprnd;
1393 }
1394
1395 default:
1396 gcc_unreachable ();
1397 }
1398}
1399
1400
c83a894c
AH
1401/* Function vect_get_vec_def_for_operand.
1402
1403 OP is an operand in STMT. This function returns a (vector) def that will be
1404 used in the vectorized stmt for STMT.
1405
1406 In the case that OP is an SSA_NAME which is defined in the loop, then
1407 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1408
1409 In case OP is an invariant or constant, a new stmt that creates a vector def
1410 needs to be introduced. VECTYPE may be used to specify a required type for
1411 vector invariant. */
1412
1413tree
1414vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1415{
1416 gimple *def_stmt;
1417 enum vect_def_type dt;
1418 bool is_simple_use;
1419 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1421
1422 if (dump_enabled_p ())
1423 {
1424 dump_printf_loc (MSG_NOTE, vect_location,
1425 "vect_get_vec_def_for_operand: ");
1426 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1427 dump_printf (MSG_NOTE, "\n");
1428 }
1429
1430 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1431 gcc_assert (is_simple_use);
1432 if (def_stmt && dump_enabled_p ())
1433 {
1434 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1435 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1436 }
1437
1438 if (dt == vect_constant_def || dt == vect_external_def)
1439 {
1440 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1441 tree vector_type;
1442
1443 if (vectype)
1444 vector_type = vectype;
1445 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1446 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1447 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1448 else
1449 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1450
1451 gcc_assert (vector_type);
1452 return vect_init_vector (stmt, op, vector_type, NULL);
1453 }
1454 else
1455 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1456}
1457
1458
ebfd146a
IR
1459/* Function vect_get_vec_def_for_stmt_copy
1460
ff802fa1 1461 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1462 vectorized stmt to be created (by the caller to this function) is a "copy"
1463 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1464 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1465 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1466 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1467 DT is the type of the vector def VEC_OPRND.
1468
1469 Context:
1470 In case the vectorization factor (VF) is bigger than the number
1471 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1472 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1473 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1474 smallest data-type determines the VF, and as a result, when vectorizing
1475 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1476 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1477 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1478 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1479 which VF=16 and nunits=4, so the number of copies required is 4):
1480
1481 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1482
ebfd146a
IR
1483 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1484 VS1.1: vx.1 = memref1 VS1.2
1485 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1486 VS1.3: vx.3 = memref3
ebfd146a
IR
1487
1488 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1489 VSnew.1: vz1 = vx.1 + ... VSnew.2
1490 VSnew.2: vz2 = vx.2 + ... VSnew.3
1491 VSnew.3: vz3 = vx.3 + ...
1492
1493 The vectorization of S1 is explained in vectorizable_load.
1494 The vectorization of S2:
b8698a0f
L
1495 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1496 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1497 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1498 returns the vector-def 'vx.0'.
1499
b8698a0f
L
1500 To create the remaining copies of the vector-stmt (VSnew.j), this
1501 function is called to get the relevant vector-def for each operand. It is
1502 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1503 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1504
b8698a0f
L
1505 For example, to obtain the vector-def 'vx.1' in order to create the
1506 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1507 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1508 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1509 and return its def ('vx.1').
1510 Overall, to create the above sequence this function will be called 3 times:
1511 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1512 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1513 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1514
1515tree
1516vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1517{
355fe088 1518 gimple *vec_stmt_for_operand;
ebfd146a
IR
1519 stmt_vec_info def_stmt_info;
1520
1521 /* Do nothing; can reuse same def. */
8644a673 1522 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1523 return vec_oprnd;
1524
1525 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1526 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1527 gcc_assert (def_stmt_info);
1528 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1529 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1530 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1531 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1532 else
1533 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1534 return vec_oprnd;
1535}
1536
1537
1538/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1539 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1540
1541static void
b8698a0f 1542vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1543 vec<tree> *vec_oprnds0,
1544 vec<tree> *vec_oprnds1)
ebfd146a 1545{
9771b263 1546 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1547
1548 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1549 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1550
9771b263 1551 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1552 {
9771b263 1553 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1554 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1555 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1556 }
1557}
1558
1559
d092494c
IR
1560/* Get vectorized definitions for OP0 and OP1.
1561 REDUC_INDEX is the index of reduction operand in case of reduction,
1562 and -1 otherwise. */
ebfd146a 1563
d092494c 1564void
355fe088 1565vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1566 vec<tree> *vec_oprnds0,
1567 vec<tree> *vec_oprnds1,
d092494c 1568 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1569{
1570 if (slp_node)
d092494c
IR
1571 {
1572 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1573 auto_vec<tree> ops (nops);
1574 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1575
9771b263 1576 ops.quick_push (op0);
d092494c 1577 if (op1)
9771b263 1578 ops.quick_push (op1);
d092494c
IR
1579
1580 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1581
37b5ec8f 1582 *vec_oprnds0 = vec_defs[0];
d092494c 1583 if (op1)
37b5ec8f 1584 *vec_oprnds1 = vec_defs[1];
d092494c 1585 }
ebfd146a
IR
1586 else
1587 {
1588 tree vec_oprnd;
1589
9771b263 1590 vec_oprnds0->create (1);
81c40241 1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1592 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1593
1594 if (op1)
1595 {
9771b263 1596 vec_oprnds1->create (1);
81c40241 1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1598 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1599 }
1600 }
1601}
1602
1603
1604/* Function vect_finish_stmt_generation.
1605
1606 Insert a new stmt. */
1607
1608void
355fe088 1609vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1610 gimple_stmt_iterator *gsi)
1611{
1612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1613 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1614
1615 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1616
54e8e2c3
RG
1617 if (!gsi_end_p (*gsi)
1618 && gimple_has_mem_ops (vec_stmt))
1619 {
355fe088 1620 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1621 tree vuse = gimple_vuse (at_stmt);
1622 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1623 {
1624 tree vdef = gimple_vdef (at_stmt);
1625 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1631 && ((is_gimple_assign (vec_stmt)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1633 || (is_gimple_call (vec_stmt)
1634 && !(gimple_call_flags (vec_stmt)
1635 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1636 {
1637 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1638 gimple_set_vdef (vec_stmt, new_vdef);
1639 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1640 }
1641 }
1642 }
ebfd146a
IR
1643 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1644
310213d4 1645 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1646
73fbfcad 1647 if (dump_enabled_p ())
ebfd146a 1648 {
78c60e3d
SS
1649 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1651 }
1652
ad885386 1653 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1654
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr = lookup_stmt_eh_lp (stmt);
1659 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1660 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1661}
1662
70439f0d
RS
1663/* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1667
70439f0d
RS
1668static internal_fn
1669vectorizable_internal_function (combined_fn cfn, tree fndecl,
1670 tree vectype_out, tree vectype_in)
ebfd146a 1671{
70439f0d
RS
1672 internal_fn ifn;
1673 if (internal_fn_p (cfn))
1674 ifn = as_internal_fn (cfn);
1675 else
1676 ifn = associated_internal_fn (fndecl);
1677 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1678 {
1679 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1680 if (info.vectorizable)
1681 {
1682 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1683 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1684 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1685 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1686 return ifn;
1687 }
1688 }
1689 return IFN_LAST;
ebfd146a
IR
1690}
1691
5ce9450f 1692
355fe088 1693static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1694 gimple_stmt_iterator *);
1695
1696
1697/* Function vectorizable_mask_load_store.
1698
1699 Check if STMT performs a conditional load or store that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703
1704static bool
355fe088
TS
1705vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1706 gimple **vec_stmt, slp_tree slp_node)
5ce9450f
JJ
1707{
1708 tree vec_dest = NULL;
1709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1710 stmt_vec_info prev_stmt_info;
1711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1712 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1713 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1714 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1715 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
57e2f6ad 1716 tree rhs_vectype = NULL_TREE;
045c1278 1717 tree mask_vectype;
5ce9450f 1718 tree elem_type;
355fe088 1719 gimple *new_stmt;
5ce9450f
JJ
1720 tree dummy;
1721 tree dataref_ptr = NULL_TREE;
355fe088 1722 gimple *ptr_incr;
5ce9450f
JJ
1723 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1724 int ncopies;
1725 int i, j;
1726 bool inv_p;
1727 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1728 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1729 int gather_scale = 1;
1730 enum vect_def_type gather_dt = vect_unknown_def_type;
1731 bool is_store;
1732 tree mask;
355fe088 1733 gimple *def_stmt;
5ce9450f
JJ
1734 enum vect_def_type dt;
1735
1736 if (slp_node != NULL)
1737 return false;
1738
1739 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1740 gcc_assert (ncopies >= 1);
1741
1742 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1743 mask = gimple_call_arg (stmt, 2);
045c1278
IE
1744
1745 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
5ce9450f
JJ
1746 return false;
1747
1748 /* FORNOW. This restriction should be relaxed. */
1749 if (nested_in_vect_loop && ncopies > 1)
1750 {
1751 if (dump_enabled_p ())
1752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1753 "multiple types in nested loop.");
1754 return false;
1755 }
1756
1757 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1758 return false;
1759
66c16fd9
RB
1760 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1761 && ! vec_stmt)
5ce9450f
JJ
1762 return false;
1763
1764 if (!STMT_VINFO_DATA_REF (stmt_info))
1765 return false;
1766
1767 elem_type = TREE_TYPE (vectype);
1768
1769 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1770 return false;
1771
f2e2a985 1772 if (STMT_VINFO_STRIDED_P (stmt_info))
5ce9450f
JJ
1773 return false;
1774
045c1278
IE
1775 if (TREE_CODE (mask) != SSA_NAME)
1776 return false;
1777
1778 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1779 return false;
1780
1781 if (!mask_vectype)
1782 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1783
dc6a3147
IE
1784 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
1785 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
045c1278
IE
1786 return false;
1787
57e2f6ad
IE
1788 if (is_store)
1789 {
1790 tree rhs = gimple_call_arg (stmt, 3);
1791 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1792 return false;
1793 }
1794
3bab6342 1795 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5ce9450f 1796 {
355fe088 1797 gimple *def_stmt;
3bab6342 1798 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
5ce9450f
JJ
1799 &gather_off, &gather_scale);
1800 gcc_assert (gather_decl);
81c40241
RB
1801 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1802 &gather_off_vectype))
5ce9450f
JJ
1803 {
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "gather index use not simple.");
1807 return false;
1808 }
03b9e8e4
JJ
1809
1810 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1811 tree masktype
1812 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1813 if (TREE_CODE (masktype) == INTEGER_TYPE)
1814 {
1815 if (dump_enabled_p ())
1816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1817 "masked gather with integer mask not supported.");
1818 return false;
1819 }
5ce9450f
JJ
1820 }
1821 else if (tree_int_cst_compare (nested_in_vect_loop
1822 ? STMT_VINFO_DR_STEP (stmt_info)
1823 : DR_STEP (dr), size_zero_node) <= 0)
1824 return false;
1825 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
045c1278
IE
1826 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1827 TYPE_MODE (mask_vectype),
57e2f6ad
IE
1828 !is_store)
1829 || (rhs_vectype
1830 && !useless_type_conversion_p (vectype, rhs_vectype)))
5ce9450f
JJ
1831 return false;
1832
5ce9450f
JJ
1833 if (!vec_stmt) /* transformation not required. */
1834 {
1835 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1836 if (is_store)
1837 vect_model_store_cost (stmt_info, ncopies, false, dt,
1838 NULL, NULL, NULL);
1839 else
1840 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1841 return true;
1842 }
1843
1844 /** Transform. **/
1845
3bab6342 1846 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5ce9450f
JJ
1847 {
1848 tree vec_oprnd0 = NULL_TREE, op;
1849 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1850 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 1851 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 1852 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 1853 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
1854 edge pe = loop_preheader_edge (loop);
1855 gimple_seq seq;
1856 basic_block new_bb;
1857 enum { NARROW, NONE, WIDEN } modifier;
1858 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1859
acdcd61b
JJ
1860 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1861 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1862 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1863 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1864 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1865 scaletype = TREE_VALUE (arglist);
1866 gcc_checking_assert (types_compatible_p (srctype, rettype)
1867 && types_compatible_p (srctype, masktype));
1868
5ce9450f
JJ
1869 if (nunits == gather_off_nunits)
1870 modifier = NONE;
1871 else if (nunits == gather_off_nunits / 2)
1872 {
1873 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1874 modifier = WIDEN;
1875
1876 for (i = 0; i < gather_off_nunits; ++i)
1877 sel[i] = i | nunits;
1878
557be5a8 1879 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5ce9450f
JJ
1880 }
1881 else if (nunits == gather_off_nunits * 2)
1882 {
1883 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1884 modifier = NARROW;
1885
1886 for (i = 0; i < nunits; ++i)
1887 sel[i] = i < gather_off_nunits
1888 ? i : i + nunits - gather_off_nunits;
1889
557be5a8 1890 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5ce9450f 1891 ncopies *= 2;
acdcd61b
JJ
1892 for (i = 0; i < nunits; ++i)
1893 sel[i] = i | gather_off_nunits;
557be5a8 1894 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
5ce9450f
JJ
1895 }
1896 else
1897 gcc_unreachable ();
1898
5ce9450f
JJ
1899 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1900
1901 ptr = fold_convert (ptrtype, gather_base);
1902 if (!is_gimple_min_invariant (ptr))
1903 {
1904 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1905 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1906 gcc_assert (!new_bb);
1907 }
1908
1909 scale = build_int_cst (scaletype, gather_scale);
1910
1911 prev_stmt_info = NULL;
1912 for (j = 0; j < ncopies; ++j)
1913 {
1914 if (modifier == WIDEN && (j & 1))
1915 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1916 perm_mask, stmt, gsi);
1917 else if (j == 0)
1918 op = vec_oprnd0
81c40241 1919 = vect_get_vec_def_for_operand (gather_off, stmt);
5ce9450f
JJ
1920 else
1921 op = vec_oprnd0
1922 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1923
1924 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1925 {
1926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1927 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 1928 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5ce9450f
JJ
1929 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1930 new_stmt
0d0e4a03 1931 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1933 op = var;
1934 }
1935
acdcd61b
JJ
1936 if (mask_perm_mask && (j & 1))
1937 mask_op = permute_vec_elements (mask_op, mask_op,
1938 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
1939 else
1940 {
acdcd61b 1941 if (j == 0)
81c40241 1942 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
acdcd61b
JJ
1943 else
1944 {
81c40241 1945 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
acdcd61b
JJ
1946 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1947 }
5ce9450f 1948
acdcd61b
JJ
1949 mask_op = vec_mask;
1950 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1951 {
1952 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1953 == TYPE_VECTOR_SUBPARTS (masktype));
0e22bb5a 1954 var = vect_get_new_ssa_name (masktype, vect_simple_var);
acdcd61b
JJ
1955 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1956 new_stmt
0d0e4a03 1957 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
1958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1959 mask_op = var;
1960 }
5ce9450f
JJ
1961 }
1962
1963 new_stmt
1964 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1965 scale);
1966
1967 if (!useless_type_conversion_p (vectype, rettype))
1968 {
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1970 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 1971 op = vect_get_new_ssa_name (rettype, vect_simple_var);
5ce9450f
JJ
1972 gimple_call_set_lhs (new_stmt, op);
1973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 1974 var = make_ssa_name (vec_dest);
5ce9450f 1975 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 1976 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1977 }
1978 else
1979 {
1980 var = make_ssa_name (vec_dest, new_stmt);
1981 gimple_call_set_lhs (new_stmt, var);
1982 }
1983
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985
1986 if (modifier == NARROW)
1987 {
1988 if ((j & 1) == 0)
1989 {
1990 prev_res = var;
1991 continue;
1992 }
1993 var = permute_vec_elements (prev_res, var,
1994 perm_mask, stmt, gsi);
1995 new_stmt = SSA_NAME_DEF_STMT (var);
1996 }
1997
1998 if (prev_stmt_info == NULL)
1999 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2000 else
2001 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2002 prev_stmt_info = vinfo_for_stmt (new_stmt);
2003 }
3efe2e2c
JJ
2004
2005 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2006 from the IL. */
e6f5c25d
IE
2007 if (STMT_VINFO_RELATED_STMT (stmt_info))
2008 {
2009 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2010 stmt_info = vinfo_for_stmt (stmt);
2011 }
3efe2e2c
JJ
2012 tree lhs = gimple_call_lhs (stmt);
2013 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2014 set_vinfo_for_stmt (new_stmt, stmt_info);
2015 set_vinfo_for_stmt (stmt, NULL);
2016 STMT_VINFO_STMT (stmt_info) = new_stmt;
2017 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2018 return true;
2019 }
2020 else if (is_store)
2021 {
2022 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2023 prev_stmt_info = NULL;
2d4dc223 2024 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
5ce9450f
JJ
2025 for (i = 0; i < ncopies; i++)
2026 {
2027 unsigned align, misalign;
2028
2029 if (i == 0)
2030 {
2031 tree rhs = gimple_call_arg (stmt, 3);
81c40241
RB
2032 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2033 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
5ce9450f
JJ
2034 /* We should have catched mismatched types earlier. */
2035 gcc_assert (useless_type_conversion_p (vectype,
2036 TREE_TYPE (vec_rhs)));
2037 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2038 NULL_TREE, &dummy, gsi,
2039 &ptr_incr, false, &inv_p);
2040 gcc_assert (!inv_p);
2041 }
2042 else
2043 {
81c40241 2044 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
5ce9450f 2045 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
81c40241 2046 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2047 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2048 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2049 TYPE_SIZE_UNIT (vectype));
2050 }
2051
2052 align = TYPE_ALIGN_UNIT (vectype);
2053 if (aligned_access_p (dr))
2054 misalign = 0;
2055 else if (DR_MISALIGNMENT (dr) == -1)
2056 {
2057 align = TYPE_ALIGN_UNIT (elem_type);
2058 misalign = 0;
2059 }
2060 else
2061 misalign = DR_MISALIGNMENT (dr);
2062 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2063 misalign);
08554c26
JJ
2064 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2065 misalign ? misalign & -misalign : align);
5ce9450f
JJ
2066 new_stmt
2067 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
08554c26 2068 ptr, vec_mask, vec_rhs);
5ce9450f
JJ
2069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2070 if (i == 0)
2071 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2072 else
2073 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2074 prev_stmt_info = vinfo_for_stmt (new_stmt);
2075 }
2076 }
2077 else
2078 {
2079 tree vec_mask = NULL_TREE;
2080 prev_stmt_info = NULL;
2081 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2082 for (i = 0; i < ncopies; i++)
2083 {
2084 unsigned align, misalign;
2085
2086 if (i == 0)
2087 {
81c40241 2088 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
5ce9450f
JJ
2089 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2090 NULL_TREE, &dummy, gsi,
2091 &ptr_incr, false, &inv_p);
2092 gcc_assert (!inv_p);
2093 }
2094 else
2095 {
81c40241 2096 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2097 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2098 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2099 TYPE_SIZE_UNIT (vectype));
2100 }
2101
2102 align = TYPE_ALIGN_UNIT (vectype);
2103 if (aligned_access_p (dr))
2104 misalign = 0;
2105 else if (DR_MISALIGNMENT (dr) == -1)
2106 {
2107 align = TYPE_ALIGN_UNIT (elem_type);
2108 misalign = 0;
2109 }
2110 else
2111 misalign = DR_MISALIGNMENT (dr);
2112 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2113 misalign);
08554c26
JJ
2114 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2115 misalign ? misalign & -misalign : align);
5ce9450f
JJ
2116 new_stmt
2117 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
08554c26 2118 ptr, vec_mask);
b731b390 2119 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
5ce9450f
JJ
2120 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2121 if (i == 0)
2122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2123 else
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2125 prev_stmt_info = vinfo_for_stmt (new_stmt);
2126 }
2127 }
2128
3efe2e2c
JJ
2129 if (!is_store)
2130 {
2131 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2132 from the IL. */
e6f5c25d
IE
2133 if (STMT_VINFO_RELATED_STMT (stmt_info))
2134 {
2135 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2136 stmt_info = vinfo_for_stmt (stmt);
2137 }
3efe2e2c
JJ
2138 tree lhs = gimple_call_lhs (stmt);
2139 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2140 set_vinfo_for_stmt (new_stmt, stmt_info);
2141 set_vinfo_for_stmt (stmt, NULL);
2142 STMT_VINFO_STMT (stmt_info) = new_stmt;
2143 gsi_replace (gsi, new_stmt, true);
2144 }
2145
5ce9450f
JJ
2146 return true;
2147}
2148
b1b6836e
RS
2149/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2150 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2151 in a single step. On success, store the binary pack code in
2152 *CONVERT_CODE. */
2153
2154static bool
2155simple_integer_narrowing (tree vectype_out, tree vectype_in,
2156 tree_code *convert_code)
2157{
2158 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2159 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2160 return false;
2161
2162 tree_code code;
2163 int multi_step_cvt = 0;
2164 auto_vec <tree, 8> interm_types;
2165 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2166 &code, &multi_step_cvt,
2167 &interm_types)
2168 || multi_step_cvt)
2169 return false;
2170
2171 *convert_code = code;
2172 return true;
2173}
5ce9450f 2174
ebfd146a
IR
2175/* Function vectorizable_call.
2176
538dd0b7 2177 Check if GS performs a function call that can be vectorized.
b8698a0f 2178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2181
2182static bool
355fe088 2183vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2184 slp_tree slp_node)
ebfd146a 2185{
538dd0b7 2186 gcall *stmt;
ebfd146a
IR
2187 tree vec_dest;
2188 tree scalar_dest;
2189 tree op, type;
2190 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2191 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2192 tree vectype_out, vectype_in;
2193 int nunits_in;
2194 int nunits_out;
2195 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2196 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2197 vec_info *vinfo = stmt_info->vinfo;
81c40241 2198 tree fndecl, new_temp, rhs_type;
355fe088 2199 gimple *def_stmt;
0502fb85
UB
2200 enum vect_def_type dt[3]
2201 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
355fe088 2202 gimple *new_stmt = NULL;
ebfd146a 2203 int ncopies, j;
6e1aa848 2204 vec<tree> vargs = vNULL;
ebfd146a
IR
2205 enum { NARROW, NONE, WIDEN } modifier;
2206 size_t i, nargs;
9d5e7640 2207 tree lhs;
ebfd146a 2208
190c2236 2209 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2210 return false;
2211
66c16fd9
RB
2212 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2213 && ! vec_stmt)
ebfd146a
IR
2214 return false;
2215
538dd0b7
DM
2216 /* Is GS a vectorizable call? */
2217 stmt = dyn_cast <gcall *> (gs);
2218 if (!stmt)
ebfd146a
IR
2219 return false;
2220
5ce9450f
JJ
2221 if (gimple_call_internal_p (stmt)
2222 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2223 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2224 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2225 slp_node);
2226
0136f8f0
AH
2227 if (gimple_call_lhs (stmt) == NULL_TREE
2228 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2229 return false;
2230
0136f8f0 2231 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2232
b690cc0f
RG
2233 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2234
ebfd146a
IR
2235 /* Process function arguments. */
2236 rhs_type = NULL_TREE;
b690cc0f 2237 vectype_in = NULL_TREE;
ebfd146a
IR
2238 nargs = gimple_call_num_args (stmt);
2239
1b1562a5
MM
2240 /* Bail out if the function has more than three arguments, we do not have
2241 interesting builtin functions to vectorize with more than two arguments
2242 except for fma. No arguments is also not good. */
2243 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2244 return false;
2245
74bf76ed
JJ
2246 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2247 if (gimple_call_internal_p (stmt)
2248 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2249 {
2250 nargs = 0;
2251 rhs_type = unsigned_type_node;
2252 }
2253
ebfd146a
IR
2254 for (i = 0; i < nargs; i++)
2255 {
b690cc0f
RG
2256 tree opvectype;
2257
ebfd146a
IR
2258 op = gimple_call_arg (stmt, i);
2259
2260 /* We can only handle calls with arguments of the same type. */
2261 if (rhs_type
8533c9d8 2262 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2263 {
73fbfcad 2264 if (dump_enabled_p ())
78c60e3d 2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2266 "argument types differ.\n");
ebfd146a
IR
2267 return false;
2268 }
b690cc0f
RG
2269 if (!rhs_type)
2270 rhs_type = TREE_TYPE (op);
ebfd146a 2271
81c40241 2272 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2273 {
73fbfcad 2274 if (dump_enabled_p ())
78c60e3d 2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2276 "use not simple.\n");
ebfd146a
IR
2277 return false;
2278 }
ebfd146a 2279
b690cc0f
RG
2280 if (!vectype_in)
2281 vectype_in = opvectype;
2282 else if (opvectype
2283 && opvectype != vectype_in)
2284 {
73fbfcad 2285 if (dump_enabled_p ())
78c60e3d 2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2287 "argument vector types differ.\n");
b690cc0f
RG
2288 return false;
2289 }
2290 }
2291 /* If all arguments are external or constant defs use a vector type with
2292 the same size as the output vector type. */
ebfd146a 2293 if (!vectype_in)
b690cc0f 2294 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2295 if (vec_stmt)
2296 gcc_assert (vectype_in);
2297 if (!vectype_in)
2298 {
73fbfcad 2299 if (dump_enabled_p ())
7d8930a0 2300 {
78c60e3d
SS
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "no vectype for scalar type ");
2303 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2304 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2305 }
2306
2307 return false;
2308 }
ebfd146a
IR
2309
2310 /* FORNOW */
b690cc0f
RG
2311 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2312 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2313 if (nunits_in == nunits_out / 2)
2314 modifier = NARROW;
2315 else if (nunits_out == nunits_in)
2316 modifier = NONE;
2317 else if (nunits_out == nunits_in / 2)
2318 modifier = WIDEN;
2319 else
2320 return false;
2321
70439f0d
RS
2322 /* We only handle functions that do not read or clobber memory. */
2323 if (gimple_vuse (stmt))
2324 {
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2327 "function reads from or writes to memory.\n");
2328 return false;
2329 }
2330
ebfd146a
IR
2331 /* For now, we only vectorize functions if a target specific builtin
2332 is available. TODO -- in some cases, it might be profitable to
2333 insert the calls for pieces of the vector, in order to be able
2334 to vectorize other operations in the loop. */
70439f0d
RS
2335 fndecl = NULL_TREE;
2336 internal_fn ifn = IFN_LAST;
2337 combined_fn cfn = gimple_call_combined_fn (stmt);
2338 tree callee = gimple_call_fndecl (stmt);
2339
2340 /* First try using an internal function. */
b1b6836e
RS
2341 tree_code convert_code = ERROR_MARK;
2342 if (cfn != CFN_LAST
2343 && (modifier == NONE
2344 || (modifier == NARROW
2345 && simple_integer_narrowing (vectype_out, vectype_in,
2346 &convert_code))))
70439f0d
RS
2347 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2348 vectype_in);
2349
2350 /* If that fails, try asking for a target-specific built-in function. */
2351 if (ifn == IFN_LAST)
2352 {
2353 if (cfn != CFN_LAST)
2354 fndecl = targetm.vectorize.builtin_vectorized_function
2355 (cfn, vectype_out, vectype_in);
2356 else
2357 fndecl = targetm.vectorize.builtin_md_vectorized_function
2358 (callee, vectype_out, vectype_in);
2359 }
2360
2361 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2362 {
70439f0d 2363 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2364 && !slp_node
2365 && loop_vinfo
2366 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2367 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2368 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2369 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2370 {
2371 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2372 { 0, 1, 2, ... vf - 1 } vector. */
2373 gcc_assert (nargs == 0);
2374 }
2375 else
2376 {
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2379 "function is not vectorizable.\n");
74bf76ed
JJ
2380 return false;
2381 }
ebfd146a
IR
2382 }
2383
fce57248 2384 if (slp_node)
190c2236 2385 ncopies = 1;
b1b6836e 2386 else if (modifier == NARROW && ifn == IFN_LAST)
ebfd146a
IR
2387 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2388 else
2389 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2390
2391 /* Sanity check: make sure that at least one copy of the vectorized stmt
2392 needs to be generated. */
2393 gcc_assert (ncopies >= 1);
2394
2395 if (!vec_stmt) /* transformation not required. */
2396 {
2397 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2398 if (dump_enabled_p ())
e645e942
TJ
2399 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2400 "\n");
c3e7ee41 2401 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
b1b6836e
RS
2402 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2403 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2404 vec_promote_demote, stmt_info, 0, vect_body);
2405
ebfd146a
IR
2406 return true;
2407 }
2408
2409 /** Transform. **/
2410
73fbfcad 2411 if (dump_enabled_p ())
e645e942 2412 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2413
2414 /* Handle def. */
2415 scalar_dest = gimple_call_lhs (stmt);
2416 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2417
2418 prev_stmt_info = NULL;
b1b6836e 2419 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2420 {
b1b6836e 2421 tree prev_res = NULL_TREE;
ebfd146a
IR
2422 for (j = 0; j < ncopies; ++j)
2423 {
2424 /* Build argument list for the vectorized call. */
2425 if (j == 0)
9771b263 2426 vargs.create (nargs);
ebfd146a 2427 else
9771b263 2428 vargs.truncate (0);
ebfd146a 2429
190c2236
JJ
2430 if (slp_node)
2431 {
ef062b13 2432 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2433 vec<tree> vec_oprnds0;
190c2236
JJ
2434
2435 for (i = 0; i < nargs; i++)
9771b263 2436 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2437 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2438 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2439
2440 /* Arguments are ready. Create the new vector stmt. */
9771b263 2441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2442 {
2443 size_t k;
2444 for (k = 0; k < nargs; k++)
2445 {
37b5ec8f 2446 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2447 vargs[k] = vec_oprndsk[i];
190c2236 2448 }
b1b6836e
RS
2449 if (modifier == NARROW)
2450 {
2451 tree half_res = make_ssa_name (vectype_in);
2452 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2453 gimple_call_set_lhs (new_stmt, half_res);
2454 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2455 if ((i & 1) == 0)
2456 {
2457 prev_res = half_res;
2458 continue;
2459 }
2460 new_temp = make_ssa_name (vec_dest);
2461 new_stmt = gimple_build_assign (new_temp, convert_code,
2462 prev_res, half_res);
2463 }
70439f0d 2464 else
b1b6836e
RS
2465 {
2466 if (ifn != IFN_LAST)
2467 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2468 else
2469 new_stmt = gimple_build_call_vec (fndecl, vargs);
2470 new_temp = make_ssa_name (vec_dest, new_stmt);
2471 gimple_call_set_lhs (new_stmt, new_temp);
2472 }
190c2236 2473 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2474 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2475 }
2476
2477 for (i = 0; i < nargs; i++)
2478 {
37b5ec8f 2479 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2480 vec_oprndsi.release ();
190c2236 2481 }
190c2236
JJ
2482 continue;
2483 }
2484
ebfd146a
IR
2485 for (i = 0; i < nargs; i++)
2486 {
2487 op = gimple_call_arg (stmt, i);
2488 if (j == 0)
2489 vec_oprnd0
81c40241 2490 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2491 else
63827fb8
IR
2492 {
2493 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2494 vec_oprnd0
2495 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2496 }
ebfd146a 2497
9771b263 2498 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2499 }
2500
74bf76ed
JJ
2501 if (gimple_call_internal_p (stmt)
2502 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2503 {
2504 tree *v = XALLOCAVEC (tree, nunits_out);
2505 int k;
2506 for (k = 0; k < nunits_out; ++k)
2507 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2508 tree cst = build_vector (vectype_out, v);
2509 tree new_var
0e22bb5a 2510 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 2511 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 2512 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2513 new_temp = make_ssa_name (vec_dest);
0e22bb5a 2514 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 2515 }
b1b6836e
RS
2516 else if (modifier == NARROW)
2517 {
2518 tree half_res = make_ssa_name (vectype_in);
2519 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2520 gimple_call_set_lhs (new_stmt, half_res);
2521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2522 if ((j & 1) == 0)
2523 {
2524 prev_res = half_res;
2525 continue;
2526 }
2527 new_temp = make_ssa_name (vec_dest);
2528 new_stmt = gimple_build_assign (new_temp, convert_code,
2529 prev_res, half_res);
2530 }
74bf76ed
JJ
2531 else
2532 {
70439f0d
RS
2533 if (ifn != IFN_LAST)
2534 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2535 else
2536 new_stmt = gimple_build_call_vec (fndecl, vargs);
74bf76ed
JJ
2537 new_temp = make_ssa_name (vec_dest, new_stmt);
2538 gimple_call_set_lhs (new_stmt, new_temp);
2539 }
ebfd146a
IR
2540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2541
b1b6836e 2542 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
2543 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2544 else
2545 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2546
2547 prev_stmt_info = vinfo_for_stmt (new_stmt);
2548 }
b1b6836e
RS
2549 }
2550 else if (modifier == NARROW)
2551 {
ebfd146a
IR
2552 for (j = 0; j < ncopies; ++j)
2553 {
2554 /* Build argument list for the vectorized call. */
2555 if (j == 0)
9771b263 2556 vargs.create (nargs * 2);
ebfd146a 2557 else
9771b263 2558 vargs.truncate (0);
ebfd146a 2559
190c2236
JJ
2560 if (slp_node)
2561 {
ef062b13 2562 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2563 vec<tree> vec_oprnds0;
190c2236
JJ
2564
2565 for (i = 0; i < nargs; i++)
9771b263 2566 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2567 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2568 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2569
2570 /* Arguments are ready. Create the new vector stmt. */
9771b263 2571 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2572 {
2573 size_t k;
9771b263 2574 vargs.truncate (0);
190c2236
JJ
2575 for (k = 0; k < nargs; k++)
2576 {
37b5ec8f 2577 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2578 vargs.quick_push (vec_oprndsk[i]);
2579 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 2580 }
70439f0d
RS
2581 if (ifn != IFN_LAST)
2582 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2583 else
2584 new_stmt = gimple_build_call_vec (fndecl, vargs);
190c2236
JJ
2585 new_temp = make_ssa_name (vec_dest, new_stmt);
2586 gimple_call_set_lhs (new_stmt, new_temp);
2587 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2589 }
2590
2591 for (i = 0; i < nargs; i++)
2592 {
37b5ec8f 2593 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2594 vec_oprndsi.release ();
190c2236 2595 }
190c2236
JJ
2596 continue;
2597 }
2598
ebfd146a
IR
2599 for (i = 0; i < nargs; i++)
2600 {
2601 op = gimple_call_arg (stmt, i);
2602 if (j == 0)
2603 {
2604 vec_oprnd0
81c40241 2605 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2606 vec_oprnd1
63827fb8 2607 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2608 }
2609 else
2610 {
336ecb65 2611 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2612 vec_oprnd0
63827fb8 2613 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2614 vec_oprnd1
63827fb8 2615 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2616 }
2617
9771b263
DN
2618 vargs.quick_push (vec_oprnd0);
2619 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2620 }
2621
b1b6836e 2622 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
2623 new_temp = make_ssa_name (vec_dest, new_stmt);
2624 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2625 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2626
2627 if (j == 0)
2628 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2629 else
2630 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2631
2632 prev_stmt_info = vinfo_for_stmt (new_stmt);
2633 }
2634
2635 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 2636 }
b1b6836e
RS
2637 else
2638 /* No current target implements this case. */
2639 return false;
ebfd146a 2640
9771b263 2641 vargs.release ();
ebfd146a 2642
ebfd146a
IR
2643 /* The call in STMT might prevent it from being removed in dce.
2644 We however cannot remove it here, due to the way the ssa name
2645 it defines is mapped to the new definition. So just replace
2646 rhs of the statement with something harmless. */
2647
dd34c087
JJ
2648 if (slp_node)
2649 return true;
2650
ebfd146a 2651 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2652 if (is_pattern_stmt_p (stmt_info))
2653 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2654 else
2655 lhs = gimple_call_lhs (stmt);
3cc2fa2a 2656
9d5e7640 2657 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2658 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2659 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2660 STMT_VINFO_STMT (stmt_info) = new_stmt;
2661 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2662
2663 return true;
2664}
2665
2666
0136f8f0
AH
2667struct simd_call_arg_info
2668{
2669 tree vectype;
2670 tree op;
2671 enum vect_def_type dt;
2672 HOST_WIDE_INT linear_step;
2673 unsigned int align;
17b658af 2674 bool simd_lane_linear;
0136f8f0
AH
2675};
2676
17b658af
JJ
2677/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2678 is linear within simd lane (but not within whole loop), note it in
2679 *ARGINFO. */
2680
2681static void
2682vect_simd_lane_linear (tree op, struct loop *loop,
2683 struct simd_call_arg_info *arginfo)
2684{
355fe088 2685 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
2686
2687 if (!is_gimple_assign (def_stmt)
2688 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2689 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2690 return;
2691
2692 tree base = gimple_assign_rhs1 (def_stmt);
2693 HOST_WIDE_INT linear_step = 0;
2694 tree v = gimple_assign_rhs2 (def_stmt);
2695 while (TREE_CODE (v) == SSA_NAME)
2696 {
2697 tree t;
2698 def_stmt = SSA_NAME_DEF_STMT (v);
2699 if (is_gimple_assign (def_stmt))
2700 switch (gimple_assign_rhs_code (def_stmt))
2701 {
2702 case PLUS_EXPR:
2703 t = gimple_assign_rhs2 (def_stmt);
2704 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2705 return;
2706 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2707 v = gimple_assign_rhs1 (def_stmt);
2708 continue;
2709 case MULT_EXPR:
2710 t = gimple_assign_rhs2 (def_stmt);
2711 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2712 return;
2713 linear_step = tree_to_shwi (t);
2714 v = gimple_assign_rhs1 (def_stmt);
2715 continue;
2716 CASE_CONVERT:
2717 t = gimple_assign_rhs1 (def_stmt);
2718 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2719 || (TYPE_PRECISION (TREE_TYPE (v))
2720 < TYPE_PRECISION (TREE_TYPE (t))))
2721 return;
2722 if (!linear_step)
2723 linear_step = 1;
2724 v = t;
2725 continue;
2726 default:
2727 return;
2728 }
2729 else if (is_gimple_call (def_stmt)
2730 && gimple_call_internal_p (def_stmt)
2731 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2732 && loop->simduid
2733 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2734 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2735 == loop->simduid))
2736 {
2737 if (!linear_step)
2738 linear_step = 1;
2739 arginfo->linear_step = linear_step;
2740 arginfo->op = base;
2741 arginfo->simd_lane_linear = true;
2742 return;
2743 }
2744 }
2745}
2746
0136f8f0
AH
2747/* Function vectorizable_simd_clone_call.
2748
2749 Check if STMT performs a function call that can be vectorized
2750 by calling a simd clone of the function.
2751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2752 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2753 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2754
2755static bool
355fe088
TS
2756vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2757 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
2758{
2759 tree vec_dest;
2760 tree scalar_dest;
2761 tree op, type;
2762 tree vec_oprnd0 = NULL_TREE;
2763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2764 tree vectype;
2765 unsigned int nunits;
2766 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2767 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2768 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 2769 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 2770 tree fndecl, new_temp;
355fe088
TS
2771 gimple *def_stmt;
2772 gimple *new_stmt = NULL;
0136f8f0 2773 int ncopies, j;
00426f9a 2774 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
2775 vec<tree> vargs = vNULL;
2776 size_t i, nargs;
2777 tree lhs, rtype, ratype;
2778 vec<constructor_elt, va_gc> *ret_ctor_elts;
2779
2780 /* Is STMT a vectorizable call? */
2781 if (!is_gimple_call (stmt))
2782 return false;
2783
2784 fndecl = gimple_call_fndecl (stmt);
2785 if (fndecl == NULL_TREE)
2786 return false;
2787
d52f5295 2788 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
2789 if (node == NULL || node->simd_clones == NULL)
2790 return false;
2791
2792 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2793 return false;
2794
66c16fd9
RB
2795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2796 && ! vec_stmt)
0136f8f0
AH
2797 return false;
2798
2799 if (gimple_call_lhs (stmt)
2800 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2801 return false;
2802
2803 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2804
2805 vectype = STMT_VINFO_VECTYPE (stmt_info);
2806
2807 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2808 return false;
2809
2810 /* FORNOW */
fce57248 2811 if (slp_node)
0136f8f0
AH
2812 return false;
2813
2814 /* Process function arguments. */
2815 nargs = gimple_call_num_args (stmt);
2816
2817 /* Bail out if the function has zero arguments. */
2818 if (nargs == 0)
2819 return false;
2820
00426f9a 2821 arginfo.reserve (nargs, true);
0136f8f0
AH
2822
2823 for (i = 0; i < nargs; i++)
2824 {
2825 simd_call_arg_info thisarginfo;
2826 affine_iv iv;
2827
2828 thisarginfo.linear_step = 0;
2829 thisarginfo.align = 0;
2830 thisarginfo.op = NULL_TREE;
17b658af 2831 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
2832
2833 op = gimple_call_arg (stmt, i);
81c40241
RB
2834 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2835 &thisarginfo.vectype)
0136f8f0
AH
2836 || thisarginfo.dt == vect_uninitialized_def)
2837 {
2838 if (dump_enabled_p ())
2839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2840 "use not simple.\n");
0136f8f0
AH
2841 return false;
2842 }
2843
2844 if (thisarginfo.dt == vect_constant_def
2845 || thisarginfo.dt == vect_external_def)
2846 gcc_assert (thisarginfo.vectype == NULL_TREE);
2847 else
2848 gcc_assert (thisarginfo.vectype != NULL_TREE);
2849
6c9e85fb
JJ
2850 /* For linear arguments, the analyze phase should have saved
2851 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
2852 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2853 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
2854 {
2855 gcc_assert (vec_stmt);
2856 thisarginfo.linear_step
17b658af 2857 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 2858 thisarginfo.op
17b658af
JJ
2859 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2860 thisarginfo.simd_lane_linear
2861 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2862 == boolean_true_node);
6c9e85fb
JJ
2863 /* If loop has been peeled for alignment, we need to adjust it. */
2864 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2865 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 2866 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
2867 {
2868 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 2869 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
2870 tree opt = TREE_TYPE (thisarginfo.op);
2871 bias = fold_convert (TREE_TYPE (step), bias);
2872 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2873 thisarginfo.op
2874 = fold_build2 (POINTER_TYPE_P (opt)
2875 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2876 thisarginfo.op, bias);
2877 }
2878 }
2879 else if (!vec_stmt
2880 && thisarginfo.dt != vect_constant_def
2881 && thisarginfo.dt != vect_external_def
2882 && loop_vinfo
2883 && TREE_CODE (op) == SSA_NAME
2884 && simple_iv (loop, loop_containing_stmt (stmt), op,
2885 &iv, false)
2886 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
2887 {
2888 thisarginfo.linear_step = tree_to_shwi (iv.step);
2889 thisarginfo.op = iv.base;
2890 }
2891 else if ((thisarginfo.dt == vect_constant_def
2892 || thisarginfo.dt == vect_external_def)
2893 && POINTER_TYPE_P (TREE_TYPE (op)))
2894 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
2895 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2896 linear too. */
2897 if (POINTER_TYPE_P (TREE_TYPE (op))
2898 && !thisarginfo.linear_step
2899 && !vec_stmt
2900 && thisarginfo.dt != vect_constant_def
2901 && thisarginfo.dt != vect_external_def
2902 && loop_vinfo
2903 && !slp_node
2904 && TREE_CODE (op) == SSA_NAME)
2905 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
2906
2907 arginfo.quick_push (thisarginfo);
2908 }
2909
2910 unsigned int badness = 0;
2911 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
2912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2913 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
2914 else
2915 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2916 n = n->simdclone->next_clone)
2917 {
2918 unsigned int this_badness = 0;
2919 if (n->simdclone->simdlen
2920 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2921 || n->simdclone->nargs != nargs)
2922 continue;
2923 if (n->simdclone->simdlen
2924 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2925 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2926 - exact_log2 (n->simdclone->simdlen)) * 1024;
2927 if (n->simdclone->inbranch)
2928 this_badness += 2048;
2929 int target_badness = targetm.simd_clone.usable (n);
2930 if (target_badness < 0)
2931 continue;
2932 this_badness += target_badness * 512;
2933 /* FORNOW: Have to add code to add the mask argument. */
2934 if (n->simdclone->inbranch)
2935 continue;
2936 for (i = 0; i < nargs; i++)
2937 {
2938 switch (n->simdclone->args[i].arg_type)
2939 {
2940 case SIMD_CLONE_ARG_TYPE_VECTOR:
2941 if (!useless_type_conversion_p
2942 (n->simdclone->args[i].orig_type,
2943 TREE_TYPE (gimple_call_arg (stmt, i))))
2944 i = -1;
2945 else if (arginfo[i].dt == vect_constant_def
2946 || arginfo[i].dt == vect_external_def
2947 || arginfo[i].linear_step)
2948 this_badness += 64;
2949 break;
2950 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2951 if (arginfo[i].dt != vect_constant_def
2952 && arginfo[i].dt != vect_external_def)
2953 i = -1;
2954 break;
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 2956 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
2957 if (arginfo[i].dt == vect_constant_def
2958 || arginfo[i].dt == vect_external_def
2959 || (arginfo[i].linear_step
2960 != n->simdclone->args[i].linear_step))
2961 i = -1;
2962 break;
2963 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
2964 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2965 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
2966 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2967 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2968 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
2969 /* FORNOW */
2970 i = -1;
2971 break;
2972 case SIMD_CLONE_ARG_TYPE_MASK:
2973 gcc_unreachable ();
2974 }
2975 if (i == (size_t) -1)
2976 break;
2977 if (n->simdclone->args[i].alignment > arginfo[i].align)
2978 {
2979 i = -1;
2980 break;
2981 }
2982 if (arginfo[i].align)
2983 this_badness += (exact_log2 (arginfo[i].align)
2984 - exact_log2 (n->simdclone->args[i].alignment));
2985 }
2986 if (i == (size_t) -1)
2987 continue;
2988 if (bestn == NULL || this_badness < badness)
2989 {
2990 bestn = n;
2991 badness = this_badness;
2992 }
2993 }
2994
2995 if (bestn == NULL)
00426f9a 2996 return false;
0136f8f0
AH
2997
2998 for (i = 0; i < nargs; i++)
2999 if ((arginfo[i].dt == vect_constant_def
3000 || arginfo[i].dt == vect_external_def)
3001 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3002 {
3003 arginfo[i].vectype
3004 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3005 i)));
3006 if (arginfo[i].vectype == NULL
3007 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3008 > bestn->simdclone->simdlen))
00426f9a 3009 return false;
0136f8f0
AH
3010 }
3011
3012 fndecl = bestn->decl;
3013 nunits = bestn->simdclone->simdlen;
3014 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3015
3016 /* If the function isn't const, only allow it in simd loops where user
3017 has asserted that at least nunits consecutive iterations can be
3018 performed using SIMD instructions. */
3019 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3020 && gimple_vuse (stmt))
00426f9a 3021 return false;
0136f8f0
AH
3022
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies >= 1);
3026
3027 if (!vec_stmt) /* transformation not required. */
3028 {
6c9e85fb
JJ
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3030 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3031 if ((bestn->simdclone->args[i].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3033 || (bestn->simdclone->args[i].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3035 {
17b658af 3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3037 + 1);
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3039 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3040 ? size_type_node : TREE_TYPE (arginfo[i].op);
3041 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3043 tree sll = arginfo[i].simd_lane_linear
3044 ? boolean_true_node : boolean_false_node;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3046 }
0136f8f0
AH
3047 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE, vect_location,
3050 "=== vectorizable_simd_clone_call ===\n");
3051/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3052 return true;
3053 }
3054
3055 /** Transform. **/
3056
3057 if (dump_enabled_p ())
3058 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3059
3060 /* Handle def. */
3061 scalar_dest = gimple_call_lhs (stmt);
3062 vec_dest = NULL_TREE;
3063 rtype = NULL_TREE;
3064 ratype = NULL_TREE;
3065 if (scalar_dest)
3066 {
3067 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3068 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3069 if (TREE_CODE (rtype) == ARRAY_TYPE)
3070 {
3071 ratype = rtype;
3072 rtype = TREE_TYPE (ratype);
3073 }
3074 }
3075
3076 prev_stmt_info = NULL;
3077 for (j = 0; j < ncopies; ++j)
3078 {
3079 /* Build argument list for the vectorized call. */
3080 if (j == 0)
3081 vargs.create (nargs);
3082 else
3083 vargs.truncate (0);
3084
3085 for (i = 0; i < nargs; i++)
3086 {
3087 unsigned int k, l, m, o;
3088 tree atype;
3089 op = gimple_call_arg (stmt, i);
3090 switch (bestn->simdclone->args[i].arg_type)
3091 {
3092 case SIMD_CLONE_ARG_TYPE_VECTOR:
3093 atype = bestn->simdclone->args[i].vector_type;
3094 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3095 for (m = j * o; m < (j + 1) * o; m++)
3096 {
3097 if (TYPE_VECTOR_SUBPARTS (atype)
3098 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3099 {
3100 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3101 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3102 / TYPE_VECTOR_SUBPARTS (atype));
3103 gcc_assert ((k & (k - 1)) == 0);
3104 if (m == 0)
3105 vec_oprnd0
81c40241 3106 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3107 else
3108 {
3109 vec_oprnd0 = arginfo[i].op;
3110 if ((m & (k - 1)) == 0)
3111 vec_oprnd0
3112 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3113 vec_oprnd0);
3114 }
3115 arginfo[i].op = vec_oprnd0;
3116 vec_oprnd0
3117 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3118 size_int (prec),
3119 bitsize_int ((m & (k - 1)) * prec));
3120 new_stmt
b731b390 3121 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3122 vec_oprnd0);
3123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3124 vargs.safe_push (gimple_assign_lhs (new_stmt));
3125 }
3126 else
3127 {
3128 k = (TYPE_VECTOR_SUBPARTS (atype)
3129 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3130 gcc_assert ((k & (k - 1)) == 0);
3131 vec<constructor_elt, va_gc> *ctor_elts;
3132 if (k != 1)
3133 vec_alloc (ctor_elts, k);
3134 else
3135 ctor_elts = NULL;
3136 for (l = 0; l < k; l++)
3137 {
3138 if (m == 0 && l == 0)
3139 vec_oprnd0
81c40241 3140 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3141 else
3142 vec_oprnd0
3143 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3144 arginfo[i].op);
3145 arginfo[i].op = vec_oprnd0;
3146 if (k == 1)
3147 break;
3148 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3149 vec_oprnd0);
3150 }
3151 if (k == 1)
3152 vargs.safe_push (vec_oprnd0);
3153 else
3154 {
3155 vec_oprnd0 = build_constructor (atype, ctor_elts);
3156 new_stmt
b731b390 3157 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3158 vec_oprnd0);
3159 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3160 vargs.safe_push (gimple_assign_lhs (new_stmt));
3161 }
3162 }
3163 }
3164 break;
3165 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3166 vargs.safe_push (op);
3167 break;
3168 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 3169 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3170 if (j == 0)
3171 {
3172 gimple_seq stmts;
3173 arginfo[i].op
3174 = force_gimple_operand (arginfo[i].op, &stmts, true,
3175 NULL_TREE);
3176 if (stmts != NULL)
3177 {
3178 basic_block new_bb;
3179 edge pe = loop_preheader_edge (loop);
3180 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3181 gcc_assert (!new_bb);
3182 }
17b658af
JJ
3183 if (arginfo[i].simd_lane_linear)
3184 {
3185 vargs.safe_push (arginfo[i].op);
3186 break;
3187 }
b731b390 3188 tree phi_res = copy_ssa_name (op);
538dd0b7 3189 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3190 set_vinfo_for_stmt (new_phi,
310213d4 3191 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3192 add_phi_arg (new_phi, arginfo[i].op,
3193 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3194 enum tree_code code
3195 = POINTER_TYPE_P (TREE_TYPE (op))
3196 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3197 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3198 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3199 widest_int cst
3200 = wi::mul (bestn->simdclone->args[i].linear_step,
3201 ncopies * nunits);
3202 tree tcst = wide_int_to_tree (type, cst);
b731b390 3203 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3204 new_stmt
3205 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3206 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3207 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3208 set_vinfo_for_stmt (new_stmt,
310213d4 3209 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3210 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3211 UNKNOWN_LOCATION);
3212 arginfo[i].op = phi_res;
3213 vargs.safe_push (phi_res);
3214 }
3215 else
3216 {
3217 enum tree_code code
3218 = POINTER_TYPE_P (TREE_TYPE (op))
3219 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3220 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3221 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3222 widest_int cst
3223 = wi::mul (bestn->simdclone->args[i].linear_step,
3224 j * nunits);
3225 tree tcst = wide_int_to_tree (type, cst);
b731b390 3226 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3227 new_stmt = gimple_build_assign (new_temp, code,
3228 arginfo[i].op, tcst);
0136f8f0
AH
3229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3230 vargs.safe_push (new_temp);
3231 }
3232 break;
7adb26f2
JJ
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 3235 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3239 default:
3240 gcc_unreachable ();
3241 }
3242 }
3243
3244 new_stmt = gimple_build_call_vec (fndecl, vargs);
3245 if (vec_dest)
3246 {
3247 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3248 if (ratype)
b731b390 3249 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3250 else if (TYPE_VECTOR_SUBPARTS (vectype)
3251 == TYPE_VECTOR_SUBPARTS (rtype))
3252 new_temp = make_ssa_name (vec_dest, new_stmt);
3253 else
3254 new_temp = make_ssa_name (rtype, new_stmt);
3255 gimple_call_set_lhs (new_stmt, new_temp);
3256 }
3257 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3258
3259 if (vec_dest)
3260 {
3261 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3262 {
3263 unsigned int k, l;
3264 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3265 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3266 gcc_assert ((k & (k - 1)) == 0);
3267 for (l = 0; l < k; l++)
3268 {
3269 tree t;
3270 if (ratype)
3271 {
3272 t = build_fold_addr_expr (new_temp);
3273 t = build2 (MEM_REF, vectype, t,
3274 build_int_cst (TREE_TYPE (t),
3275 l * prec / BITS_PER_UNIT));
3276 }
3277 else
3278 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3279 size_int (prec), bitsize_int (l * prec));
3280 new_stmt
b731b390 3281 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3283 if (j == 0 && l == 0)
3284 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3285 else
3286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3287
3288 prev_stmt_info = vinfo_for_stmt (new_stmt);
3289 }
3290
3291 if (ratype)
3292 {
3293 tree clobber = build_constructor (ratype, NULL);
3294 TREE_THIS_VOLATILE (clobber) = 1;
3295 new_stmt = gimple_build_assign (new_temp, clobber);
3296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3297 }
3298 continue;
3299 }
3300 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3301 {
3302 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3303 / TYPE_VECTOR_SUBPARTS (rtype));
3304 gcc_assert ((k & (k - 1)) == 0);
3305 if ((j & (k - 1)) == 0)
3306 vec_alloc (ret_ctor_elts, k);
3307 if (ratype)
3308 {
3309 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3310 for (m = 0; m < o; m++)
3311 {
3312 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3313 size_int (m), NULL_TREE, NULL_TREE);
3314 new_stmt
b731b390 3315 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3316 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3318 gimple_assign_lhs (new_stmt));
3319 }
3320 tree clobber = build_constructor (ratype, NULL);
3321 TREE_THIS_VOLATILE (clobber) = 1;
3322 new_stmt = gimple_build_assign (new_temp, clobber);
3323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3324 }
3325 else
3326 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3327 if ((j & (k - 1)) != k - 1)
3328 continue;
3329 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3330 new_stmt
b731b390 3331 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3332 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3333
3334 if ((unsigned) j == k - 1)
3335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3336 else
3337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3338
3339 prev_stmt_info = vinfo_for_stmt (new_stmt);
3340 continue;
3341 }
3342 else if (ratype)
3343 {
3344 tree t = build_fold_addr_expr (new_temp);
3345 t = build2 (MEM_REF, vectype, t,
3346 build_int_cst (TREE_TYPE (t), 0));
3347 new_stmt
b731b390 3348 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3349 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3350 tree clobber = build_constructor (ratype, NULL);
3351 TREE_THIS_VOLATILE (clobber) = 1;
3352 vect_finish_stmt_generation (stmt,
3353 gimple_build_assign (new_temp,
3354 clobber), gsi);
3355 }
3356 }
3357
3358 if (j == 0)
3359 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3360 else
3361 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3362
3363 prev_stmt_info = vinfo_for_stmt (new_stmt);
3364 }
3365
3366 vargs.release ();
3367
3368 /* The call in STMT might prevent it from being removed in dce.
3369 We however cannot remove it here, due to the way the ssa name
3370 it defines is mapped to the new definition. So just replace
3371 rhs of the statement with something harmless. */
3372
3373 if (slp_node)
3374 return true;
3375
3376 if (scalar_dest)
3377 {
3378 type = TREE_TYPE (scalar_dest);
3379 if (is_pattern_stmt_p (stmt_info))
3380 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3381 else
3382 lhs = gimple_call_lhs (stmt);
3383 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3384 }
3385 else
3386 new_stmt = gimple_build_nop ();
3387 set_vinfo_for_stmt (new_stmt, stmt_info);
3388 set_vinfo_for_stmt (stmt, NULL);
3389 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3390 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3391 unlink_stmt_vdef (stmt);
3392
3393 return true;
3394}
3395
3396
ebfd146a
IR
3397/* Function vect_gen_widened_results_half
3398
3399 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3400 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3401 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3402 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3403 needs to be created (DECL is a function-decl of a target-builtin).
3404 STMT is the original scalar stmt that we are vectorizing. */
3405
355fe088 3406static gimple *
ebfd146a
IR
3407vect_gen_widened_results_half (enum tree_code code,
3408 tree decl,
3409 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3410 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3411 gimple *stmt)
b8698a0f 3412{
355fe088 3413 gimple *new_stmt;
b8698a0f
L
3414 tree new_temp;
3415
3416 /* Generate half of the widened result: */
3417 if (code == CALL_EXPR)
3418 {
3419 /* Target specific support */
ebfd146a
IR
3420 if (op_type == binary_op)
3421 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3422 else
3423 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3424 new_temp = make_ssa_name (vec_dest, new_stmt);
3425 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3426 }
3427 else
ebfd146a 3428 {
b8698a0f
L
3429 /* Generic support */
3430 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3431 if (op_type != binary_op)
3432 vec_oprnd1 = NULL;
0d0e4a03 3433 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3434 new_temp = make_ssa_name (vec_dest, new_stmt);
3435 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3436 }
ebfd146a
IR
3437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3438
ebfd146a
IR
3439 return new_stmt;
3440}
3441
4a00c761
JJ
3442
3443/* Get vectorized definitions for loop-based vectorization. For the first
3444 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3445 scalar operand), and for the rest we get a copy with
3446 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3447 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3448 The vectors are collected into VEC_OPRNDS. */
3449
3450static void
355fe088 3451vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3452 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3453{
3454 tree vec_oprnd;
3455
3456 /* Get first vector operand. */
3457 /* All the vector operands except the very first one (that is scalar oprnd)
3458 are stmt copies. */
3459 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3460 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
3461 else
3462 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3463
9771b263 3464 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3465
3466 /* Get second vector operand. */
3467 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3468 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3469
3470 *oprnd = vec_oprnd;
3471
3472 /* For conversion in multiple steps, continue to get operands
3473 recursively. */
3474 if (multi_step_cvt)
3475 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3476}
3477
3478
3479/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3480 For multi-step conversions store the resulting vectors and call the function
3481 recursively. */
3482
3483static void
9771b263 3484vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 3485 int multi_step_cvt, gimple *stmt,
9771b263 3486 vec<tree> vec_dsts,
4a00c761
JJ
3487 gimple_stmt_iterator *gsi,
3488 slp_tree slp_node, enum tree_code code,
3489 stmt_vec_info *prev_stmt_info)
3490{
3491 unsigned int i;
3492 tree vop0, vop1, new_tmp, vec_dest;
355fe088 3493 gimple *new_stmt;
4a00c761
JJ
3494 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3495
9771b263 3496 vec_dest = vec_dsts.pop ();
4a00c761 3497
9771b263 3498 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3499 {
3500 /* Create demotion operation. */
9771b263
DN
3501 vop0 = (*vec_oprnds)[i];
3502 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3503 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3504 new_tmp = make_ssa_name (vec_dest, new_stmt);
3505 gimple_assign_set_lhs (new_stmt, new_tmp);
3506 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3507
3508 if (multi_step_cvt)
3509 /* Store the resulting vector for next recursive call. */
9771b263 3510 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3511 else
3512 {
3513 /* This is the last step of the conversion sequence. Store the
3514 vectors in SLP_NODE or in vector info of the scalar statement
3515 (or in STMT_VINFO_RELATED_STMT chain). */
3516 if (slp_node)
9771b263 3517 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 3518 else
c689ce1e
RB
3519 {
3520 if (!*prev_stmt_info)
3521 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3522 else
3523 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 3524
c689ce1e
RB
3525 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3526 }
4a00c761
JJ
3527 }
3528 }
3529
3530 /* For multi-step demotion operations we first generate demotion operations
3531 from the source type to the intermediate types, and then combine the
3532 results (stored in VEC_OPRNDS) in demotion operation to the destination
3533 type. */
3534 if (multi_step_cvt)
3535 {
3536 /* At each level of recursion we have half of the operands we had at the
3537 previous level. */
9771b263 3538 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3539 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3540 stmt, vec_dsts, gsi, slp_node,
3541 VEC_PACK_TRUNC_EXPR,
3542 prev_stmt_info);
3543 }
3544
9771b263 3545 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3546}
3547
3548
3549/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3550 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3551 the resulting vectors and call the function recursively. */
3552
3553static void
9771b263
DN
3554vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3555 vec<tree> *vec_oprnds1,
355fe088 3556 gimple *stmt, tree vec_dest,
4a00c761
JJ
3557 gimple_stmt_iterator *gsi,
3558 enum tree_code code1,
3559 enum tree_code code2, tree decl1,
3560 tree decl2, int op_type)
3561{
3562 int i;
3563 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 3564 gimple *new_stmt1, *new_stmt2;
6e1aa848 3565 vec<tree> vec_tmp = vNULL;
4a00c761 3566
9771b263
DN
3567 vec_tmp.create (vec_oprnds0->length () * 2);
3568 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3569 {
3570 if (op_type == binary_op)
9771b263 3571 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3572 else
3573 vop1 = NULL_TREE;
3574
3575 /* Generate the two halves of promotion operation. */
3576 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3577 op_type, vec_dest, gsi, stmt);
3578 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3579 op_type, vec_dest, gsi, stmt);
3580 if (is_gimple_call (new_stmt1))
3581 {
3582 new_tmp1 = gimple_call_lhs (new_stmt1);
3583 new_tmp2 = gimple_call_lhs (new_stmt2);
3584 }
3585 else
3586 {
3587 new_tmp1 = gimple_assign_lhs (new_stmt1);
3588 new_tmp2 = gimple_assign_lhs (new_stmt2);
3589 }
3590
3591 /* Store the results for the next step. */
9771b263
DN
3592 vec_tmp.quick_push (new_tmp1);
3593 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3594 }
3595
689eaba3 3596 vec_oprnds0->release ();
4a00c761
JJ
3597 *vec_oprnds0 = vec_tmp;
3598}
3599
3600
b8698a0f
L
3601/* Check if STMT performs a conversion operation, that can be vectorized.
3602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3603 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3605
3606static bool
355fe088
TS
3607vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3608 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
3609{
3610 tree vec_dest;
3611 tree scalar_dest;
4a00c761 3612 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3613 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3614 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3615 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3616 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3617 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3618 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3619 tree new_temp;
355fe088 3620 gimple *def_stmt;
ebfd146a 3621 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
355fe088 3622 gimple *new_stmt = NULL;
ebfd146a
IR
3623 stmt_vec_info prev_stmt_info;
3624 int nunits_in;
3625 int nunits_out;
3626 tree vectype_out, vectype_in;
4a00c761
JJ
3627 int ncopies, i, j;
3628 tree lhs_type, rhs_type;
ebfd146a 3629 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3630 vec<tree> vec_oprnds0 = vNULL;
3631 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3632 tree vop0;
4a00c761 3633 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3634 vec_info *vinfo = stmt_info->vinfo;
4a00c761 3635 int multi_step_cvt = 0;
6e1aa848
DN
3636 vec<tree> vec_dsts = vNULL;
3637 vec<tree> interm_types = vNULL;
4a00c761
JJ
3638 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3639 int op_type;
ef4bddc2 3640 machine_mode rhs_mode;
4a00c761 3641 unsigned short fltsz;
ebfd146a
IR
3642
3643 /* Is STMT a vectorizable conversion? */
3644
4a00c761 3645 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3646 return false;
3647
66c16fd9
RB
3648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3649 && ! vec_stmt)
ebfd146a
IR
3650 return false;
3651
3652 if (!is_gimple_assign (stmt))
3653 return false;
3654
3655 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3656 return false;
3657
3658 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3659 if (!CONVERT_EXPR_CODE_P (code)
3660 && code != FIX_TRUNC_EXPR
3661 && code != FLOAT_EXPR
3662 && code != WIDEN_MULT_EXPR
3663 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3664 return false;
3665
4a00c761
JJ
3666 op_type = TREE_CODE_LENGTH (code);
3667
ebfd146a 3668 /* Check types of lhs and rhs. */
b690cc0f 3669 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3670 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3671 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3672
ebfd146a
IR
3673 op0 = gimple_assign_rhs1 (stmt);
3674 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3675
3676 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3677 && !((INTEGRAL_TYPE_P (lhs_type)
3678 && INTEGRAL_TYPE_P (rhs_type))
3679 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3680 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3681 return false;
3682
e6f5c25d
IE
3683 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3684 && ((INTEGRAL_TYPE_P (lhs_type)
3685 && (TYPE_PRECISION (lhs_type)
3686 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3687 || (INTEGRAL_TYPE_P (rhs_type)
3688 && (TYPE_PRECISION (rhs_type)
3689 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4a00c761 3690 {
73fbfcad 3691 if (dump_enabled_p ())
78c60e3d 3692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3693 "type conversion to/from bit-precision unsupported."
3694 "\n");
4a00c761
JJ
3695 return false;
3696 }
3697
b690cc0f 3698 /* Check the operands of the operation. */
81c40241 3699 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 3700 {
73fbfcad 3701 if (dump_enabled_p ())
78c60e3d 3702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3703 "use not simple.\n");
b690cc0f
RG
3704 return false;
3705 }
4a00c761
JJ
3706 if (op_type == binary_op)
3707 {
3708 bool ok;
3709
3710 op1 = gimple_assign_rhs2 (stmt);
3711 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3712 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3713 OP1. */
3714 if (CONSTANT_CLASS_P (op0))
81c40241 3715 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 3716 else
81c40241 3717 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
3718
3719 if (!ok)
3720 {
73fbfcad 3721 if (dump_enabled_p ())
78c60e3d 3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3723 "use not simple.\n");
4a00c761
JJ
3724 return false;
3725 }
3726 }
3727
b690cc0f
RG
3728 /* If op0 is an external or constant defs use a vector type of
3729 the same size as the output vector type. */
ebfd146a 3730 if (!vectype_in)
b690cc0f 3731 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3732 if (vec_stmt)
3733 gcc_assert (vectype_in);
3734 if (!vectype_in)
3735 {
73fbfcad 3736 if (dump_enabled_p ())
4a00c761 3737 {
78c60e3d
SS
3738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3739 "no vectype for scalar type ");
3740 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3741 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3742 }
7d8930a0
IR
3743
3744 return false;
3745 }
ebfd146a 3746
e6f5c25d
IE
3747 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3748 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3749 {
3750 if (dump_enabled_p ())
3751 {
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3753 "can't convert between boolean and non "
3754 "boolean vectors");
3755 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3756 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3757 }
3758
3759 return false;
3760 }
3761
b690cc0f
RG
3762 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3763 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3764 if (nunits_in < nunits_out)
ebfd146a
IR
3765 modifier = NARROW;
3766 else if (nunits_out == nunits_in)
3767 modifier = NONE;
ebfd146a 3768 else
4a00c761 3769 modifier = WIDEN;
ebfd146a 3770
ff802fa1
IR
3771 /* Multiple types in SLP are handled by creating the appropriate number of
3772 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3773 case of SLP. */
fce57248 3774 if (slp_node)
ebfd146a 3775 ncopies = 1;
4a00c761
JJ
3776 else if (modifier == NARROW)
3777 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3778 else
3779 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3780
ebfd146a
IR
3781 /* Sanity check: make sure that at least one copy of the vectorized stmt
3782 needs to be generated. */
3783 gcc_assert (ncopies >= 1);
3784
ebfd146a 3785 /* Supportable by target? */
4a00c761 3786 switch (modifier)
ebfd146a 3787 {
4a00c761
JJ
3788 case NONE:
3789 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3790 return false;
3791 if (supportable_convert_operation (code, vectype_out, vectype_in,
3792 &decl1, &code1))
3793 break;
3794 /* FALLTHRU */
3795 unsupported:
73fbfcad 3796 if (dump_enabled_p ())
78c60e3d 3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3798 "conversion not supported by target.\n");
ebfd146a 3799 return false;
ebfd146a 3800
4a00c761
JJ
3801 case WIDEN:
3802 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3803 &code1, &code2, &multi_step_cvt,
3804 &interm_types))
4a00c761
JJ
3805 {
3806 /* Binary widening operation can only be supported directly by the
3807 architecture. */
3808 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3809 break;
3810 }
3811
3812 if (code != FLOAT_EXPR
3813 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3814 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3815 goto unsupported;
3816
3817 rhs_mode = TYPE_MODE (rhs_type);
3818 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3819 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3820 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3821 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3822 {
3823 cvt_type
3824 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3825 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3826 if (cvt_type == NULL_TREE)
3827 goto unsupported;
3828
3829 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3830 {
3831 if (!supportable_convert_operation (code, vectype_out,
3832 cvt_type, &decl1, &codecvt1))
3833 goto unsupported;
3834 }
3835 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3836 cvt_type, &codecvt1,
3837 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3838 &interm_types))
3839 continue;
3840 else
3841 gcc_assert (multi_step_cvt == 0);
3842
3843 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3844 vectype_in, &code1, &code2,
3845 &multi_step_cvt, &interm_types))
4a00c761
JJ
3846 break;
3847 }
3848
3849 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3850 goto unsupported;
3851
3852 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3853 codecvt2 = ERROR_MARK;
3854 else
3855 {
3856 multi_step_cvt++;
9771b263 3857 interm_types.safe_push (cvt_type);
4a00c761
JJ
3858 cvt_type = NULL_TREE;
3859 }
3860 break;
3861
3862 case NARROW:
3863 gcc_assert (op_type == unary_op);
3864 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3865 &code1, &multi_step_cvt,
3866 &interm_types))
3867 break;
3868
3869 if (code != FIX_TRUNC_EXPR
3870 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3871 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3872 goto unsupported;
3873
3874 rhs_mode = TYPE_MODE (rhs_type);
3875 cvt_type
3876 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3877 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3878 if (cvt_type == NULL_TREE)
3879 goto unsupported;
3880 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3881 &decl1, &codecvt1))
3882 goto unsupported;
3883 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3884 &code1, &multi_step_cvt,
3885 &interm_types))
3886 break;
3887 goto unsupported;
3888
3889 default:
3890 gcc_unreachable ();
ebfd146a
IR
3891 }
3892
3893 if (!vec_stmt) /* transformation not required. */
3894 {
73fbfcad 3895 if (dump_enabled_p ())
78c60e3d 3896 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3897 "=== vectorizable_conversion ===\n");
4a00c761 3898 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3899 {
3900 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3901 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3902 }
4a00c761
JJ
3903 else if (modifier == NARROW)
3904 {
3905 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3906 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3907 }
3908 else
3909 {
3910 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3911 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3912 }
9771b263 3913 interm_types.release ();
ebfd146a
IR
3914 return true;
3915 }
3916
3917 /** Transform. **/
73fbfcad 3918 if (dump_enabled_p ())
78c60e3d 3919 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3920 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3921
4a00c761
JJ
3922 if (op_type == binary_op)
3923 {
3924 if (CONSTANT_CLASS_P (op0))
3925 op0 = fold_convert (TREE_TYPE (op1), op0);
3926 else if (CONSTANT_CLASS_P (op1))
3927 op1 = fold_convert (TREE_TYPE (op0), op1);
3928 }
3929
3930 /* In case of multi-step conversion, we first generate conversion operations
3931 to the intermediate types, and then from that types to the final one.
3932 We create vector destinations for the intermediate type (TYPES) received
3933 from supportable_*_operation, and store them in the correct order
3934 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3935 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3936 vec_dest = vect_create_destination_var (scalar_dest,
3937 (cvt_type && modifier == WIDEN)
3938 ? cvt_type : vectype_out);
9771b263 3939 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3940
3941 if (multi_step_cvt)
3942 {
9771b263
DN
3943 for (i = interm_types.length () - 1;
3944 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3945 {
3946 vec_dest = vect_create_destination_var (scalar_dest,
3947 intermediate_type);
9771b263 3948 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3949 }
3950 }
ebfd146a 3951
4a00c761 3952 if (cvt_type)
82294ec1
JJ
3953 vec_dest = vect_create_destination_var (scalar_dest,
3954 modifier == WIDEN
3955 ? vectype_out : cvt_type);
4a00c761
JJ
3956
3957 if (!slp_node)
3958 {
30862efc 3959 if (modifier == WIDEN)
4a00c761 3960 {
c3284718 3961 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3962 if (op_type == binary_op)
9771b263 3963 vec_oprnds1.create (1);
4a00c761 3964 }
30862efc 3965 else if (modifier == NARROW)
9771b263
DN
3966 vec_oprnds0.create (
3967 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3968 }
3969 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 3970 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 3971
4a00c761 3972 last_oprnd = op0;
ebfd146a
IR
3973 prev_stmt_info = NULL;
3974 switch (modifier)
3975 {
3976 case NONE:
3977 for (j = 0; j < ncopies; j++)
3978 {
ebfd146a 3979 if (j == 0)
d092494c
IR
3980 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3981 -1);
ebfd146a
IR
3982 else
3983 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3984
9771b263 3985 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3986 {
3987 /* Arguments are ready, create the new vector stmt. */
3988 if (code1 == CALL_EXPR)
3989 {
3990 new_stmt = gimple_build_call (decl1, 1, vop0);
3991 new_temp = make_ssa_name (vec_dest, new_stmt);
3992 gimple_call_set_lhs (new_stmt, new_temp);
3993 }
3994 else
3995 {
3996 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 3997 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
3998 new_temp = make_ssa_name (vec_dest, new_stmt);
3999 gimple_assign_set_lhs (new_stmt, new_temp);
4000 }
4001
4002 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4003 if (slp_node)
9771b263 4004 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4005 else
4006 {
4007 if (!prev_stmt_info)
4008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4009 else
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4011 prev_stmt_info = vinfo_for_stmt (new_stmt);
4012 }
4a00c761 4013 }
ebfd146a
IR
4014 }
4015 break;
4016
4017 case WIDEN:
4018 /* In case the vectorization factor (VF) is bigger than the number
4019 of elements that we can fit in a vectype (nunits), we have to
4020 generate more than one vector stmt - i.e - we need to "unroll"
4021 the vector stmt by a factor VF/nunits. */
4022 for (j = 0; j < ncopies; j++)
4023 {
4a00c761 4024 /* Handle uses. */
ebfd146a 4025 if (j == 0)
4a00c761
JJ
4026 {
4027 if (slp_node)
4028 {
4029 if (code == WIDEN_LSHIFT_EXPR)
4030 {
4031 unsigned int k;
ebfd146a 4032
4a00c761
JJ
4033 vec_oprnd1 = op1;
4034 /* Store vec_oprnd1 for every vector stmt to be created
4035 for SLP_NODE. We check during the analysis that all
4036 the shift arguments are the same. */
4037 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4038 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4039
4040 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4041 slp_node, -1);
4042 }
4043 else
4044 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4045 &vec_oprnds1, slp_node, -1);
4046 }
4047 else
4048 {
81c40241 4049 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4050 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4051 if (op_type == binary_op)
4052 {
4053 if (code == WIDEN_LSHIFT_EXPR)
4054 vec_oprnd1 = op1;
4055 else
81c40241 4056 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4057 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4058 }
4059 }
4060 }
ebfd146a 4061 else
4a00c761
JJ
4062 {
4063 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4064 vec_oprnds0.truncate (0);
4065 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4066 if (op_type == binary_op)
4067 {
4068 if (code == WIDEN_LSHIFT_EXPR)
4069 vec_oprnd1 = op1;
4070 else
4071 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4072 vec_oprnd1);
9771b263
DN
4073 vec_oprnds1.truncate (0);
4074 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4075 }
4076 }
ebfd146a 4077
4a00c761
JJ
4078 /* Arguments are ready. Create the new vector stmts. */
4079 for (i = multi_step_cvt; i >= 0; i--)
4080 {
9771b263 4081 tree this_dest = vec_dsts[i];
4a00c761
JJ
4082 enum tree_code c1 = code1, c2 = code2;
4083 if (i == 0 && codecvt2 != ERROR_MARK)
4084 {
4085 c1 = codecvt1;
4086 c2 = codecvt2;
4087 }
4088 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4089 &vec_oprnds1,
4090 stmt, this_dest, gsi,
4091 c1, c2, decl1, decl2,
4092 op_type);
4093 }
4094
9771b263 4095 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4096 {
4097 if (cvt_type)
4098 {
4099 if (codecvt1 == CALL_EXPR)
4100 {
4101 new_stmt = gimple_build_call (decl1, 1, vop0);
4102 new_temp = make_ssa_name (vec_dest, new_stmt);
4103 gimple_call_set_lhs (new_stmt, new_temp);
4104 }
4105 else
4106 {
4107 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4108 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4109 new_stmt = gimple_build_assign (new_temp, codecvt1,
4110 vop0);
4a00c761
JJ
4111 }
4112
4113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4114 }
4115 else
4116 new_stmt = SSA_NAME_DEF_STMT (vop0);
4117
4118 if (slp_node)
9771b263 4119 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4120 else
c689ce1e
RB
4121 {
4122 if (!prev_stmt_info)
4123 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4124 else
4125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4126 prev_stmt_info = vinfo_for_stmt (new_stmt);
4127 }
4a00c761 4128 }
ebfd146a 4129 }
4a00c761
JJ
4130
4131 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4132 break;
4133
4134 case NARROW:
4135 /* In case the vectorization factor (VF) is bigger than the number
4136 of elements that we can fit in a vectype (nunits), we have to
4137 generate more than one vector stmt - i.e - we need to "unroll"
4138 the vector stmt by a factor VF/nunits. */
4139 for (j = 0; j < ncopies; j++)
4140 {
4141 /* Handle uses. */
4a00c761
JJ
4142 if (slp_node)
4143 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4144 slp_node, -1);
ebfd146a
IR
4145 else
4146 {
9771b263 4147 vec_oprnds0.truncate (0);
4a00c761
JJ
4148 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4149 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4150 }
4151
4a00c761
JJ
4152 /* Arguments are ready. Create the new vector stmts. */
4153 if (cvt_type)
9771b263 4154 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4155 {
4156 if (codecvt1 == CALL_EXPR)
4157 {
4158 new_stmt = gimple_build_call (decl1, 1, vop0);
4159 new_temp = make_ssa_name (vec_dest, new_stmt);
4160 gimple_call_set_lhs (new_stmt, new_temp);
4161 }
4162 else
4163 {
4164 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4165 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4166 new_stmt = gimple_build_assign (new_temp, codecvt1,
4167 vop0);
4a00c761 4168 }
ebfd146a 4169
4a00c761 4170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4171 vec_oprnds0[i] = new_temp;
4a00c761 4172 }
ebfd146a 4173
4a00c761
JJ
4174 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4175 stmt, vec_dsts, gsi,
4176 slp_node, code1,
4177 &prev_stmt_info);
ebfd146a
IR
4178 }
4179
4180 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4181 break;
ebfd146a
IR
4182 }
4183
9771b263
DN
4184 vec_oprnds0.release ();
4185 vec_oprnds1.release ();
4186 vec_dsts.release ();
4187 interm_types.release ();
ebfd146a
IR
4188
4189 return true;
4190}
ff802fa1
IR
4191
4192
ebfd146a
IR
4193/* Function vectorizable_assignment.
4194
b8698a0f
L
4195 Check if STMT performs an assignment (copy) that can be vectorized.
4196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4199
4200static bool
355fe088
TS
4201vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4202 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4203{
4204 tree vec_dest;
4205 tree scalar_dest;
4206 tree op;
4207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4209 tree new_temp;
355fe088 4210 gimple *def_stmt;
ebfd146a 4211 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
ebfd146a 4212 int ncopies;
f18b55bd 4213 int i, j;
6e1aa848 4214 vec<tree> vec_oprnds = vNULL;
ebfd146a 4215 tree vop;
a70d6342 4216 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4217 vec_info *vinfo = stmt_info->vinfo;
355fe088 4218 gimple *new_stmt = NULL;
f18b55bd 4219 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4220 enum tree_code code;
4221 tree vectype_in;
ebfd146a 4222
a70d6342 4223 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4224 return false;
4225
66c16fd9
RB
4226 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4227 && ! vec_stmt)
ebfd146a
IR
4228 return false;
4229
4230 /* Is vectorizable assignment? */
4231 if (!is_gimple_assign (stmt))
4232 return false;
4233
4234 scalar_dest = gimple_assign_lhs (stmt);
4235 if (TREE_CODE (scalar_dest) != SSA_NAME)
4236 return false;
4237
fde9c428 4238 code = gimple_assign_rhs_code (stmt);
ebfd146a 4239 if (gimple_assign_single_p (stmt)
fde9c428
RG
4240 || code == PAREN_EXPR
4241 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4242 op = gimple_assign_rhs1 (stmt);
4243 else
4244 return false;
4245
7b7ec6c5
RG
4246 if (code == VIEW_CONVERT_EXPR)
4247 op = TREE_OPERAND (op, 0);
4248
465c8c19
JJ
4249 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4250 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4251
4252 /* Multiple types in SLP are handled by creating the appropriate number of
4253 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4254 case of SLP. */
fce57248 4255 if (slp_node)
465c8c19
JJ
4256 ncopies = 1;
4257 else
4258 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4259
4260 gcc_assert (ncopies >= 1);
4261
81c40241 4262 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4263 {
73fbfcad 4264 if (dump_enabled_p ())
78c60e3d 4265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4266 "use not simple.\n");
ebfd146a
IR
4267 return false;
4268 }
4269
fde9c428
RG
4270 /* We can handle NOP_EXPR conversions that do not change the number
4271 of elements or the vector size. */
7b7ec6c5
RG
4272 if ((CONVERT_EXPR_CODE_P (code)
4273 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4274 && (!vectype_in
4275 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4276 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4277 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4278 return false;
4279
7b7b1813
RG
4280 /* We do not handle bit-precision changes. */
4281 if ((CONVERT_EXPR_CODE_P (code)
4282 || code == VIEW_CONVERT_EXPR)
4283 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4284 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4285 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4286 || ((TYPE_PRECISION (TREE_TYPE (op))
4287 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4288 /* But a conversion that does not change the bit-pattern is ok. */
4289 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4290 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4291 && TYPE_UNSIGNED (TREE_TYPE (op)))
4292 /* Conversion between boolean types of different sizes is
4293 a simple assignment in case their vectypes are same
4294 boolean vectors. */
4295 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4296 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4297 {
73fbfcad 4298 if (dump_enabled_p ())
78c60e3d
SS
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4300 "type conversion to/from bit-precision "
e645e942 4301 "unsupported.\n");
7b7b1813
RG
4302 return false;
4303 }
4304
ebfd146a
IR
4305 if (!vec_stmt) /* transformation not required. */
4306 {
4307 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4308 if (dump_enabled_p ())
78c60e3d 4309 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4310 "=== vectorizable_assignment ===\n");
c3e7ee41 4311 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4312 return true;
4313 }
4314
4315 /** Transform. **/
73fbfcad 4316 if (dump_enabled_p ())
e645e942 4317 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4318
4319 /* Handle def. */
4320 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4321
4322 /* Handle use. */
f18b55bd 4323 for (j = 0; j < ncopies; j++)
ebfd146a 4324 {
f18b55bd
IR
4325 /* Handle uses. */
4326 if (j == 0)
d092494c 4327 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4328 else
4329 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4330
4331 /* Arguments are ready. create the new vector stmt. */
9771b263 4332 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4333 {
7b7ec6c5
RG
4334 if (CONVERT_EXPR_CODE_P (code)
4335 || code == VIEW_CONVERT_EXPR)
4a73490d 4336 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4337 new_stmt = gimple_build_assign (vec_dest, vop);
4338 new_temp = make_ssa_name (vec_dest, new_stmt);
4339 gimple_assign_set_lhs (new_stmt, new_temp);
4340 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4341 if (slp_node)
9771b263 4342 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4343 }
ebfd146a
IR
4344
4345 if (slp_node)
f18b55bd
IR
4346 continue;
4347
4348 if (j == 0)
4349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4350 else
4351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4352
4353 prev_stmt_info = vinfo_for_stmt (new_stmt);
4354 }
b8698a0f 4355
9771b263 4356 vec_oprnds.release ();
ebfd146a
IR
4357 return true;
4358}
4359
9dc3f7de 4360
1107f3ae
IR
4361/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4362 either as shift by a scalar or by a vector. */
4363
4364bool
4365vect_supportable_shift (enum tree_code code, tree scalar_type)
4366{
4367
ef4bddc2 4368 machine_mode vec_mode;
1107f3ae
IR
4369 optab optab;
4370 int icode;
4371 tree vectype;
4372
4373 vectype = get_vectype_for_scalar_type (scalar_type);
4374 if (!vectype)
4375 return false;
4376
4377 optab = optab_for_tree_code (code, vectype, optab_scalar);
4378 if (!optab
4379 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4380 {
4381 optab = optab_for_tree_code (code, vectype, optab_vector);
4382 if (!optab
4383 || (optab_handler (optab, TYPE_MODE (vectype))
4384 == CODE_FOR_nothing))
4385 return false;
4386 }
4387
4388 vec_mode = TYPE_MODE (vectype);
4389 icode = (int) optab_handler (optab, vec_mode);
4390 if (icode == CODE_FOR_nothing)
4391 return false;
4392
4393 return true;
4394}
4395
4396
9dc3f7de
IR
4397/* Function vectorizable_shift.
4398
4399 Check if STMT performs a shift operation that can be vectorized.
4400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4403
4404static bool
355fe088
TS
4405vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4406 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4407{
4408 tree vec_dest;
4409 tree scalar_dest;
4410 tree op0, op1 = NULL;
4411 tree vec_oprnd1 = NULL_TREE;
4412 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4413 tree vectype;
4414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4415 enum tree_code code;
ef4bddc2 4416 machine_mode vec_mode;
9dc3f7de
IR
4417 tree new_temp;
4418 optab optab;
4419 int icode;
ef4bddc2 4420 machine_mode optab_op2_mode;
355fe088 4421 gimple *def_stmt;
9dc3f7de 4422 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
355fe088 4423 gimple *new_stmt = NULL;
9dc3f7de
IR
4424 stmt_vec_info prev_stmt_info;
4425 int nunits_in;
4426 int nunits_out;
4427 tree vectype_out;
cede2577 4428 tree op1_vectype;
9dc3f7de
IR
4429 int ncopies;
4430 int j, i;
6e1aa848
DN
4431 vec<tree> vec_oprnds0 = vNULL;
4432 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4433 tree vop0, vop1;
4434 unsigned int k;
49eab32e 4435 bool scalar_shift_arg = true;
9dc3f7de 4436 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4437 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4438 int vf;
4439
4440 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4441 return false;
4442
66c16fd9
RB
4443 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4444 && ! vec_stmt)
9dc3f7de
IR
4445 return false;
4446
4447 /* Is STMT a vectorizable binary/unary operation? */
4448 if (!is_gimple_assign (stmt))
4449 return false;
4450
4451 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4452 return false;
4453
4454 code = gimple_assign_rhs_code (stmt);
4455
4456 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4457 || code == RROTATE_EXPR))
4458 return false;
4459
4460 scalar_dest = gimple_assign_lhs (stmt);
4461 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4462 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4463 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4464 {
73fbfcad 4465 if (dump_enabled_p ())
78c60e3d 4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4467 "bit-precision shifts not supported.\n");
7b7b1813
RG
4468 return false;
4469 }
9dc3f7de
IR
4470
4471 op0 = gimple_assign_rhs1 (stmt);
81c40241 4472 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 4473 {
73fbfcad 4474 if (dump_enabled_p ())
78c60e3d 4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4476 "use not simple.\n");
9dc3f7de
IR
4477 return false;
4478 }
4479 /* If op0 is an external or constant def use a vector type with
4480 the same size as the output vector type. */
4481 if (!vectype)
4482 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4483 if (vec_stmt)
4484 gcc_assert (vectype);
4485 if (!vectype)
4486 {
73fbfcad 4487 if (dump_enabled_p ())
78c60e3d 4488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4489 "no vectype for scalar type\n");
9dc3f7de
IR
4490 return false;
4491 }
4492
4493 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4494 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4495 if (nunits_out != nunits_in)
4496 return false;
4497
4498 op1 = gimple_assign_rhs2 (stmt);
81c40241 4499 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 4500 {
73fbfcad 4501 if (dump_enabled_p ())
78c60e3d 4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4503 "use not simple.\n");
9dc3f7de
IR
4504 return false;
4505 }
4506
4507 if (loop_vinfo)
4508 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4509 else
4510 vf = 1;
4511
4512 /* Multiple types in SLP are handled by creating the appropriate number of
4513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4514 case of SLP. */
fce57248 4515 if (slp_node)
9dc3f7de
IR
4516 ncopies = 1;
4517 else
4518 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4519
4520 gcc_assert (ncopies >= 1);
4521
4522 /* Determine whether the shift amount is a vector, or scalar. If the
4523 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4524
dbfa87aa
YR
4525 if ((dt[1] == vect_internal_def
4526 || dt[1] == vect_induction_def)
4527 && !slp_node)
49eab32e
JJ
4528 scalar_shift_arg = false;
4529 else if (dt[1] == vect_constant_def
4530 || dt[1] == vect_external_def
4531 || dt[1] == vect_internal_def)
4532 {
4533 /* In SLP, need to check whether the shift count is the same,
4534 in loops if it is a constant or invariant, it is always
4535 a scalar shift. */
4536 if (slp_node)
4537 {
355fe088
TS
4538 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4539 gimple *slpstmt;
49eab32e 4540
9771b263 4541 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4542 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4543 scalar_shift_arg = false;
4544 }
60d393e8
RB
4545
4546 /* If the shift amount is computed by a pattern stmt we cannot
4547 use the scalar amount directly thus give up and use a vector
4548 shift. */
4549 if (dt[1] == vect_internal_def)
4550 {
4551 gimple *def = SSA_NAME_DEF_STMT (op1);
4552 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4553 scalar_shift_arg = false;
4554 }
49eab32e
JJ
4555 }
4556 else
4557 {
73fbfcad 4558 if (dump_enabled_p ())
78c60e3d 4559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4560 "operand mode requires invariant argument.\n");
49eab32e
JJ
4561 return false;
4562 }
4563
9dc3f7de 4564 /* Vector shifted by vector. */
49eab32e 4565 if (!scalar_shift_arg)
9dc3f7de
IR
4566 {
4567 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4568 if (dump_enabled_p ())
78c60e3d 4569 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4570 "vector/vector shift/rotate found.\n");
78c60e3d 4571
aa948027
JJ
4572 if (!op1_vectype)
4573 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4574 if (op1_vectype == NULL_TREE
4575 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4576 {
73fbfcad 4577 if (dump_enabled_p ())
78c60e3d
SS
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4579 "unusable type for last operand in"
e645e942 4580 " vector/vector shift/rotate.\n");
cede2577
JJ
4581 return false;
4582 }
9dc3f7de
IR
4583 }
4584 /* See if the machine has a vector shifted by scalar insn and if not
4585 then see if it has a vector shifted by vector insn. */
49eab32e 4586 else
9dc3f7de
IR
4587 {
4588 optab = optab_for_tree_code (code, vectype, optab_scalar);
4589 if (optab
4590 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4591 {
73fbfcad 4592 if (dump_enabled_p ())
78c60e3d 4593 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4594 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4595 }
4596 else
4597 {
4598 optab = optab_for_tree_code (code, vectype, optab_vector);
4599 if (optab
4600 && (optab_handler (optab, TYPE_MODE (vectype))
4601 != CODE_FOR_nothing))
4602 {
49eab32e
JJ
4603 scalar_shift_arg = false;
4604
73fbfcad 4605 if (dump_enabled_p ())
78c60e3d 4606 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4607 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4608
4609 /* Unlike the other binary operators, shifts/rotates have
4610 the rhs being int, instead of the same type as the lhs,
4611 so make sure the scalar is the right type if we are
aa948027 4612 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4613 if (dt[1] == vect_constant_def)
4614 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4615 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4616 TREE_TYPE (op1)))
4617 {
4618 if (slp_node
4619 && TYPE_MODE (TREE_TYPE (vectype))
4620 != TYPE_MODE (TREE_TYPE (op1)))
4621 {
73fbfcad 4622 if (dump_enabled_p ())
78c60e3d
SS
4623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4624 "unusable type for last operand in"
e645e942 4625 " vector/vector shift/rotate.\n");
21c0a521 4626 return false;
aa948027
JJ
4627 }
4628 if (vec_stmt && !slp_node)
4629 {
4630 op1 = fold_convert (TREE_TYPE (vectype), op1);
4631 op1 = vect_init_vector (stmt, op1,
4632 TREE_TYPE (vectype), NULL);
4633 }
4634 }
9dc3f7de
IR
4635 }
4636 }
4637 }
9dc3f7de
IR
4638
4639 /* Supportable by target? */
4640 if (!optab)
4641 {
73fbfcad 4642 if (dump_enabled_p ())
78c60e3d 4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4644 "no optab.\n");
9dc3f7de
IR
4645 return false;
4646 }
4647 vec_mode = TYPE_MODE (vectype);
4648 icode = (int) optab_handler (optab, vec_mode);
4649 if (icode == CODE_FOR_nothing)
4650 {
73fbfcad 4651 if (dump_enabled_p ())
78c60e3d 4652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4653 "op not supported by target.\n");
9dc3f7de
IR
4654 /* Check only during analysis. */
4655 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4656 || (vf < vect_min_worthwhile_factor (code)
4657 && !vec_stmt))
4658 return false;
73fbfcad 4659 if (dump_enabled_p ())
e645e942
TJ
4660 dump_printf_loc (MSG_NOTE, vect_location,
4661 "proceeding using word mode.\n");
9dc3f7de
IR
4662 }
4663
4664 /* Worthwhile without SIMD support? Check only during analysis. */
4665 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4666 && vf < vect_min_worthwhile_factor (code)
4667 && !vec_stmt)
4668 {
73fbfcad 4669 if (dump_enabled_p ())
78c60e3d 4670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4671 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4672 return false;
4673 }
4674
4675 if (!vec_stmt) /* transformation not required. */
4676 {
4677 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4678 if (dump_enabled_p ())
e645e942
TJ
4679 dump_printf_loc (MSG_NOTE, vect_location,
4680 "=== vectorizable_shift ===\n");
c3e7ee41 4681 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4682 return true;
4683 }
4684
4685 /** Transform. **/
4686
73fbfcad 4687 if (dump_enabled_p ())
78c60e3d 4688 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4689 "transform binary/unary operation.\n");
9dc3f7de
IR
4690
4691 /* Handle def. */
4692 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4693
9dc3f7de
IR
4694 prev_stmt_info = NULL;
4695 for (j = 0; j < ncopies; j++)
4696 {
4697 /* Handle uses. */
4698 if (j == 0)
4699 {
4700 if (scalar_shift_arg)
4701 {
4702 /* Vector shl and shr insn patterns can be defined with scalar
4703 operand 2 (shift operand). In this case, use constant or loop
4704 invariant op1 directly, without extending it to vector mode
4705 first. */
4706 optab_op2_mode = insn_data[icode].operand[2].mode;
4707 if (!VECTOR_MODE_P (optab_op2_mode))
4708 {
73fbfcad 4709 if (dump_enabled_p ())
78c60e3d 4710 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4711 "operand 1 using scalar mode.\n");
9dc3f7de 4712 vec_oprnd1 = op1;
8930f723 4713 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4714 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4715 if (slp_node)
4716 {
4717 /* Store vec_oprnd1 for every vector stmt to be created
4718 for SLP_NODE. We check during the analysis that all
4719 the shift arguments are the same.
4720 TODO: Allow different constants for different vector
4721 stmts generated for an SLP instance. */
4722 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4723 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4724 }
4725 }
4726 }
4727
4728 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4729 (a special case for certain kind of vector shifts); otherwise,
4730 operand 1 should be of a vector type (the usual case). */
4731 if (vec_oprnd1)
4732 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4733 slp_node, -1);
9dc3f7de
IR
4734 else
4735 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4736 slp_node, -1);
9dc3f7de
IR
4737 }
4738 else
4739 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4740
4741 /* Arguments are ready. Create the new vector stmt. */
9771b263 4742 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4743 {
9771b263 4744 vop1 = vec_oprnds1[i];
0d0e4a03 4745 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
4746 new_temp = make_ssa_name (vec_dest, new_stmt);
4747 gimple_assign_set_lhs (new_stmt, new_temp);
4748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4749 if (slp_node)
9771b263 4750 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4751 }
4752
4753 if (slp_node)
4754 continue;
4755
4756 if (j == 0)
4757 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4758 else
4759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4760 prev_stmt_info = vinfo_for_stmt (new_stmt);
4761 }
4762
9771b263
DN
4763 vec_oprnds0.release ();
4764 vec_oprnds1.release ();
9dc3f7de
IR
4765
4766 return true;
4767}
4768
4769
ebfd146a
IR
4770/* Function vectorizable_operation.
4771
16949072
RG
4772 Check if STMT performs a binary, unary or ternary operation that can
4773 be vectorized.
b8698a0f 4774 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4775 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4776 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4777
4778static bool
355fe088
TS
4779vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4780 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 4781{
00f07b86 4782 tree vec_dest;
ebfd146a 4783 tree scalar_dest;
16949072 4784 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4785 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4786 tree vectype;
ebfd146a
IR
4787 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4788 enum tree_code code;
ef4bddc2 4789 machine_mode vec_mode;
ebfd146a
IR
4790 tree new_temp;
4791 int op_type;
00f07b86 4792 optab optab;
523ba738 4793 bool target_support_p;
355fe088 4794 gimple *def_stmt;
16949072
RG
4795 enum vect_def_type dt[3]
4796 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
355fe088 4797 gimple *new_stmt = NULL;
ebfd146a 4798 stmt_vec_info prev_stmt_info;
b690cc0f 4799 int nunits_in;
ebfd146a
IR
4800 int nunits_out;
4801 tree vectype_out;
4802 int ncopies;
4803 int j, i;
6e1aa848
DN
4804 vec<tree> vec_oprnds0 = vNULL;
4805 vec<tree> vec_oprnds1 = vNULL;
4806 vec<tree> vec_oprnds2 = vNULL;
16949072 4807 tree vop0, vop1, vop2;
a70d6342 4808 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4809 vec_info *vinfo = stmt_info->vinfo;
a70d6342
IR
4810 int vf;
4811
a70d6342 4812 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4813 return false;
4814
66c16fd9
RB
4815 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4816 && ! vec_stmt)
ebfd146a
IR
4817 return false;
4818
4819 /* Is STMT a vectorizable binary/unary operation? */
4820 if (!is_gimple_assign (stmt))
4821 return false;
4822
4823 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4824 return false;
4825
ebfd146a
IR
4826 code = gimple_assign_rhs_code (stmt);
4827
4828 /* For pointer addition, we should use the normal plus for
4829 the vector addition. */
4830 if (code == POINTER_PLUS_EXPR)
4831 code = PLUS_EXPR;
4832
4833 /* Support only unary or binary operations. */
4834 op_type = TREE_CODE_LENGTH (code);
16949072 4835 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4836 {
73fbfcad 4837 if (dump_enabled_p ())
78c60e3d 4838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4839 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4840 op_type);
ebfd146a
IR
4841 return false;
4842 }
4843
b690cc0f
RG
4844 scalar_dest = gimple_assign_lhs (stmt);
4845 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4846
7b7b1813
RG
4847 /* Most operations cannot handle bit-precision types without extra
4848 truncations. */
045c1278
IE
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4850 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4851 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
7b7b1813
RG
4852 /* Exception are bitwise binary operations. */
4853 && code != BIT_IOR_EXPR
4854 && code != BIT_XOR_EXPR
4855 && code != BIT_AND_EXPR)
4856 {
73fbfcad 4857 if (dump_enabled_p ())
78c60e3d 4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4859 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4860 return false;
4861 }
4862
ebfd146a 4863 op0 = gimple_assign_rhs1 (stmt);
81c40241 4864 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 4865 {
73fbfcad 4866 if (dump_enabled_p ())
78c60e3d 4867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4868 "use not simple.\n");
ebfd146a
IR
4869 return false;
4870 }
b690cc0f
RG
4871 /* If op0 is an external or constant def use a vector type with
4872 the same size as the output vector type. */
4873 if (!vectype)
b036c6c5
IE
4874 {
4875 /* For boolean type we cannot determine vectype by
4876 invariant value (don't know whether it is a vector
4877 of booleans or vector of integers). We use output
4878 vectype because operations on boolean don't change
4879 type. */
4880 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4881 {
4882 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4883 {
4884 if (dump_enabled_p ())
4885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4886 "not supported operation on bool value.\n");
4887 return false;
4888 }
4889 vectype = vectype_out;
4890 }
4891 else
4892 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4893 }
7d8930a0
IR
4894 if (vec_stmt)
4895 gcc_assert (vectype);
4896 if (!vectype)
4897 {
73fbfcad 4898 if (dump_enabled_p ())
7d8930a0 4899 {
78c60e3d
SS
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4901 "no vectype for scalar type ");
4902 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4903 TREE_TYPE (op0));
e645e942 4904 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4905 }
4906
4907 return false;
4908 }
b690cc0f
RG
4909
4910 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4911 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4912 if (nunits_out != nunits_in)
4913 return false;
ebfd146a 4914
16949072 4915 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4916 {
4917 op1 = gimple_assign_rhs2 (stmt);
81c40241 4918 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 4919 {
73fbfcad 4920 if (dump_enabled_p ())
78c60e3d 4921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4922 "use not simple.\n");
ebfd146a
IR
4923 return false;
4924 }
4925 }
16949072
RG
4926 if (op_type == ternary_op)
4927 {
4928 op2 = gimple_assign_rhs3 (stmt);
81c40241 4929 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 4930 {
73fbfcad 4931 if (dump_enabled_p ())
78c60e3d 4932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4933 "use not simple.\n");
16949072
RG
4934 return false;
4935 }
4936 }
ebfd146a 4937
b690cc0f
RG
4938 if (loop_vinfo)
4939 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4940 else
4941 vf = 1;
4942
4943 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4945 case of SLP. */
fce57248 4946 if (slp_node)
b690cc0f
RG
4947 ncopies = 1;
4948 else
4949 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4950
4951 gcc_assert (ncopies >= 1);
4952
9dc3f7de 4953 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4954 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4955 || code == RROTATE_EXPR)
9dc3f7de 4956 return false;
ebfd146a 4957
ebfd146a 4958 /* Supportable by target? */
00f07b86
RH
4959
4960 vec_mode = TYPE_MODE (vectype);
4961 if (code == MULT_HIGHPART_EXPR)
523ba738 4962 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
4963 else
4964 {
4965 optab = optab_for_tree_code (code, vectype, optab_default);
4966 if (!optab)
5deb57cb 4967 {
73fbfcad 4968 if (dump_enabled_p ())
78c60e3d 4969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4970 "no optab.\n");
00f07b86 4971 return false;
5deb57cb 4972 }
523ba738
RS
4973 target_support_p = (optab_handler (optab, vec_mode)
4974 != CODE_FOR_nothing);
5deb57cb
JJ
4975 }
4976
523ba738 4977 if (!target_support_p)
ebfd146a 4978 {
73fbfcad 4979 if (dump_enabled_p ())
78c60e3d 4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4981 "op not supported by target.\n");
ebfd146a
IR
4982 /* Check only during analysis. */
4983 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 4984 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 4985 return false;
73fbfcad 4986 if (dump_enabled_p ())
e645e942
TJ
4987 dump_printf_loc (MSG_NOTE, vect_location,
4988 "proceeding using word mode.\n");
383d9c83
IR
4989 }
4990
4a00c761 4991 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
4992 if (!VECTOR_MODE_P (vec_mode)
4993 && !vec_stmt
4994 && vf < vect_min_worthwhile_factor (code))
7d8930a0 4995 {
73fbfcad 4996 if (dump_enabled_p ())
78c60e3d 4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4998 "not worthwhile without SIMD support.\n");
e34842c6 4999 return false;
7d8930a0 5000 }
ebfd146a 5001
ebfd146a
IR
5002 if (!vec_stmt) /* transformation not required. */
5003 {
4a00c761 5004 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5005 if (dump_enabled_p ())
78c60e3d 5006 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5007 "=== vectorizable_operation ===\n");
c3e7ee41 5008 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
5009 return true;
5010 }
5011
5012 /** Transform. **/
5013
73fbfcad 5014 if (dump_enabled_p ())
78c60e3d 5015 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5016 "transform binary/unary operation.\n");
383d9c83 5017
ebfd146a 5018 /* Handle def. */
00f07b86 5019 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5020
ebfd146a
IR
5021 /* In case the vectorization factor (VF) is bigger than the number
5022 of elements that we can fit in a vectype (nunits), we have to generate
5023 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5024 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5025 from one copy of the vector stmt to the next, in the field
5026 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5027 stages to find the correct vector defs to be used when vectorizing
5028 stmts that use the defs of the current stmt. The example below
5029 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5030 we need to create 4 vectorized stmts):
5031
5032 before vectorization:
5033 RELATED_STMT VEC_STMT
5034 S1: x = memref - -
5035 S2: z = x + 1 - -
5036
5037 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5038 there):
5039 RELATED_STMT VEC_STMT
5040 VS1_0: vx0 = memref0 VS1_1 -
5041 VS1_1: vx1 = memref1 VS1_2 -
5042 VS1_2: vx2 = memref2 VS1_3 -
5043 VS1_3: vx3 = memref3 - -
5044 S1: x = load - VS1_0
5045 S2: z = x + 1 - -
5046
5047 step2: vectorize stmt S2 (done here):
5048 To vectorize stmt S2 we first need to find the relevant vector
5049 def for the first operand 'x'. This is, as usual, obtained from
5050 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5051 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5052 relevant vector def 'vx0'. Having found 'vx0' we can generate
5053 the vector stmt VS2_0, and as usual, record it in the
5054 STMT_VINFO_VEC_STMT of stmt S2.
5055 When creating the second copy (VS2_1), we obtain the relevant vector
5056 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5057 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5058 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5059 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5060 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5061 chain of stmts and pointers:
5062 RELATED_STMT VEC_STMT
5063 VS1_0: vx0 = memref0 VS1_1 -
5064 VS1_1: vx1 = memref1 VS1_2 -
5065 VS1_2: vx2 = memref2 VS1_3 -
5066 VS1_3: vx3 = memref3 - -
5067 S1: x = load - VS1_0
5068 VS2_0: vz0 = vx0 + v1 VS2_1 -
5069 VS2_1: vz1 = vx1 + v1 VS2_2 -
5070 VS2_2: vz2 = vx2 + v1 VS2_3 -
5071 VS2_3: vz3 = vx3 + v1 - -
5072 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5073
5074 prev_stmt_info = NULL;
5075 for (j = 0; j < ncopies; j++)
5076 {
5077 /* Handle uses. */
5078 if (j == 0)
4a00c761
JJ
5079 {
5080 if (op_type == binary_op || op_type == ternary_op)
5081 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5082 slp_node, -1);
5083 else
5084 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5085 slp_node, -1);
5086 if (op_type == ternary_op)
36ba4aae 5087 {
9771b263
DN
5088 vec_oprnds2.create (1);
5089 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
81c40241 5090 stmt));
36ba4aae 5091 }
4a00c761 5092 }
ebfd146a 5093 else
4a00c761
JJ
5094 {
5095 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5096 if (op_type == ternary_op)
5097 {
9771b263
DN
5098 tree vec_oprnd = vec_oprnds2.pop ();
5099 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5100 vec_oprnd));
4a00c761
JJ
5101 }
5102 }
5103
5104 /* Arguments are ready. Create the new vector stmt. */
9771b263 5105 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5106 {
4a00c761 5107 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5108 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5109 vop2 = ((op_type == ternary_op)
9771b263 5110 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5111 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5112 new_temp = make_ssa_name (vec_dest, new_stmt);
5113 gimple_assign_set_lhs (new_stmt, new_temp);
5114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5115 if (slp_node)
9771b263 5116 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5117 }
5118
4a00c761
JJ
5119 if (slp_node)
5120 continue;
5121
5122 if (j == 0)
5123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5124 else
5125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5126 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5127 }
5128
9771b263
DN
5129 vec_oprnds0.release ();
5130 vec_oprnds1.release ();
5131 vec_oprnds2.release ();
ebfd146a 5132
ebfd146a
IR
5133 return true;
5134}
5135
c716e67f
XDL
5136/* A helper function to ensure data reference DR's base alignment
5137 for STMT_INFO. */
5138
5139static void
5140ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5141{
5142 if (!dr->aux)
5143 return;
5144
52639a61 5145 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f
XDL
5146 {
5147 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
52639a61 5148 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5149
428f0c67
JH
5150 if (decl_in_symtab_p (base_decl))
5151 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5152 else
5153 {
fe37c7af 5154 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
428f0c67
JH
5155 DECL_USER_ALIGN (base_decl) = 1;
5156 }
52639a61 5157 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5158 }
5159}
5160
ebfd146a 5161
09dfa495
BM
5162/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5163 reversal of the vector elements. If that is impossible to do,
5164 returns NULL. */
5165
5166static tree
5167perm_mask_for_reverse (tree vectype)
5168{
5169 int i, nunits;
5170 unsigned char *sel;
5171
5172 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5173 sel = XALLOCAVEC (unsigned char, nunits);
5174
5175 for (i = 0; i < nunits; ++i)
5176 sel[i] = nunits - 1 - i;
5177
557be5a8
AL
5178 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5179 return NULL_TREE;
5180 return vect_gen_perm_mask_checked (vectype, sel);
09dfa495
BM
5181}
5182
ebfd146a
IR
5183/* Function vectorizable_store.
5184
b8698a0f
L
5185 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5186 can be vectorized.
5187 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5188 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5190
5191static bool
355fe088 5192vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5193 slp_tree slp_node)
ebfd146a
IR
5194{
5195 tree scalar_dest;
5196 tree data_ref;
5197 tree op;
5198 tree vec_oprnd = NULL_TREE;
5199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5200 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5201 tree elem_type;
ebfd146a 5202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5203 struct loop *loop = NULL;
ef4bddc2 5204 machine_mode vec_mode;
ebfd146a
IR
5205 tree dummy;
5206 enum dr_alignment_support alignment_support_scheme;
355fe088 5207 gimple *def_stmt;
ebfd146a
IR
5208 enum vect_def_type dt;
5209 stmt_vec_info prev_stmt_info = NULL;
5210 tree dataref_ptr = NULL_TREE;
74bf76ed 5211 tree dataref_offset = NULL_TREE;
355fe088 5212 gimple *ptr_incr = NULL;
ebfd146a
IR
5213 int ncopies;
5214 int j;
355fe088 5215 gimple *next_stmt, *first_stmt = NULL;
0d0293ac 5216 bool grouped_store = false;
272c6793 5217 bool store_lanes_p = false;
ebfd146a 5218 unsigned int group_size, i;
6e1aa848
DN
5219 vec<tree> dr_chain = vNULL;
5220 vec<tree> oprnds = vNULL;
5221 vec<tree> result_chain = vNULL;
ebfd146a 5222 bool inv_p;
09dfa495
BM
5223 bool negative = false;
5224 tree offset = NULL_TREE;
6e1aa848 5225 vec<tree> vec_oprnds = vNULL;
ebfd146a 5226 bool slp = (slp_node != NULL);
ebfd146a 5227 unsigned int vec_num;
a70d6342 5228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5229 vec_info *vinfo = stmt_info->vinfo;
272c6793 5230 tree aggr_type;
3bab6342
AT
5231 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5232 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5233 int scatter_scale = 1;
5234 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5235 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5236 gimple *new_stmt;
b17dc4d4 5237 int vf;
a70d6342 5238
a70d6342 5239 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5240 return false;
5241
66c16fd9
RB
5242 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5243 && ! vec_stmt)
ebfd146a
IR
5244 return false;
5245
5246 /* Is vectorizable store? */
5247
5248 if (!is_gimple_assign (stmt))
5249 return false;
5250
5251 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5252 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5253 && is_pattern_stmt_p (stmt_info))
5254 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5255 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5256 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5257 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5258 && TREE_CODE (scalar_dest) != COMPONENT_REF
5259 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5260 && TREE_CODE (scalar_dest) != REALPART_EXPR
5261 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5262 return false;
5263
fce57248
RS
5264 /* Cannot have hybrid store SLP -- that would mean storing to the
5265 same location twice. */
5266 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5267
ebfd146a 5268 gcc_assert (gimple_assign_single_p (stmt));
465c8c19 5269
f4d09712 5270 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
465c8c19
JJ
5271 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5272
5273 if (loop_vinfo)
b17dc4d4
RB
5274 {
5275 loop = LOOP_VINFO_LOOP (loop_vinfo);
5276 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5277 }
5278 else
5279 vf = 1;
465c8c19
JJ
5280
5281 /* Multiple types in SLP are handled by creating the appropriate number of
5282 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5283 case of SLP. */
fce57248 5284 if (slp)
465c8c19
JJ
5285 ncopies = 1;
5286 else
5287 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5288
5289 gcc_assert (ncopies >= 1);
5290
5291 /* FORNOW. This restriction should be relaxed. */
5292 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5293 {
5294 if (dump_enabled_p ())
5295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5296 "multiple types in nested loop.\n");
5297 return false;
5298 }
5299
ebfd146a 5300 op = gimple_assign_rhs1 (stmt);
f4d09712
KY
5301
5302 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
ebfd146a 5303 {
73fbfcad 5304 if (dump_enabled_p ())
78c60e3d 5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5306 "use not simple.\n");
ebfd146a
IR
5307 return false;
5308 }
5309
f4d09712
KY
5310 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5311 return false;
5312
272c6793 5313 elem_type = TREE_TYPE (vectype);
ebfd146a 5314 vec_mode = TYPE_MODE (vectype);
7b7b1813 5315
ebfd146a
IR
5316 /* FORNOW. In some cases can vectorize even if data-type not supported
5317 (e.g. - array initialization with 0). */
947131ba 5318 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5319 return false;
5320
5321 if (!STMT_VINFO_DATA_REF (stmt_info))
5322 return false;
5323
f2e2a985 5324 if (!STMT_VINFO_STRIDED_P (stmt_info))
09dfa495 5325 {
f2e2a985
MM
5326 negative =
5327 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5328 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5329 size_zero_node) < 0;
5330 if (negative && ncopies > 1)
09dfa495
BM
5331 {
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f2e2a985 5334 "multiple types with negative step.\n");
09dfa495
BM
5335 return false;
5336 }
f2e2a985 5337 if (negative)
09dfa495 5338 {
f2e2a985
MM
5339 gcc_assert (!grouped_store);
5340 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5341 if (alignment_support_scheme != dr_aligned
5342 && alignment_support_scheme != dr_unaligned_supported)
5343 {
5344 if (dump_enabled_p ())
5345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5346 "negative step but alignment required.\n");
5347 return false;
5348 }
5349 if (dt != vect_constant_def
5350 && dt != vect_external_def
5351 && !perm_mask_for_reverse (vectype))
5352 {
5353 if (dump_enabled_p ())
5354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5355 "negative step and reversing not supported.\n");
5356 return false;
5357 }
09dfa495
BM
5358 }
5359 }
5360
0d0293ac 5361 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5362 {
0d0293ac 5363 grouped_store = true;
e14c1050 5364 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
cee62fee 5365 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
fce57248 5366 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
b602d918 5367 {
272c6793
RS
5368 if (vect_store_lanes_supported (vectype, group_size))
5369 store_lanes_p = true;
0d0293ac 5370 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5371 return false;
5372 }
b8698a0f 5373
ebfd146a
IR
5374 if (first_stmt == stmt)
5375 {
5376 /* STMT is the leader of the group. Check the operands of all the
5377 stmts of the group. */
e14c1050 5378 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5379 while (next_stmt)
5380 {
5381 gcc_assert (gimple_assign_single_p (next_stmt));
5382 op = gimple_assign_rhs1 (next_stmt);
81c40241 5383 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
ebfd146a 5384 {
73fbfcad 5385 if (dump_enabled_p ())
78c60e3d 5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5387 "use not simple.\n");
ebfd146a
IR
5388 return false;
5389 }
e14c1050 5390 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5391 }
5392 }
5393 }
5394
3bab6342
AT
5395 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5396 {
355fe088 5397 gimple *def_stmt;
3bab6342
AT
5398 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5399 &scatter_off, &scatter_scale);
5400 gcc_assert (scatter_decl);
81c40241
RB
5401 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5402 &scatter_off_vectype))
3bab6342
AT
5403 {
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406 "scatter index use not simple.");
5407 return false;
5408 }
5409 }
5410
ebfd146a
IR
5411 if (!vec_stmt) /* transformation not required. */
5412 {
5413 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5414 /* The SLP costs are calculated during SLP analysis. */
5415 if (!PURE_SLP_STMT (stmt_info))
5416 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5417 NULL, NULL, NULL);
ebfd146a
IR
5418 return true;
5419 }
5420
5421 /** Transform. **/
5422
c716e67f
XDL
5423 ensure_base_align (stmt_info, dr);
5424
3bab6342
AT
5425 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5426 {
5427 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5428 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5429 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5430 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5431 edge pe = loop_preheader_edge (loop);
5432 gimple_seq seq;
5433 basic_block new_bb;
5434 enum { NARROW, NONE, WIDEN } modifier;
5435 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5436
5437 if (nunits == (unsigned int) scatter_off_nunits)
5438 modifier = NONE;
5439 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5440 {
5441 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5442 modifier = WIDEN;
5443
5444 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5445 sel[i] = i | nunits;
5446
5447 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5448 gcc_assert (perm_mask != NULL_TREE);
5449 }
5450 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5451 {
5452 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5453 modifier = NARROW;
5454
5455 for (i = 0; i < (unsigned int) nunits; ++i)
5456 sel[i] = i | scatter_off_nunits;
5457
5458 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5459 gcc_assert (perm_mask != NULL_TREE);
5460 ncopies *= 2;
5461 }
5462 else
5463 gcc_unreachable ();
5464
5465 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5466 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5467 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5468 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5469 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5470 scaletype = TREE_VALUE (arglist);
5471
5472 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5473 && TREE_CODE (rettype) == VOID_TYPE);
5474
5475 ptr = fold_convert (ptrtype, scatter_base);
5476 if (!is_gimple_min_invariant (ptr))
5477 {
5478 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5479 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5480 gcc_assert (!new_bb);
5481 }
5482
5483 /* Currently we support only unconditional scatter stores,
5484 so mask should be all ones. */
5485 mask = build_int_cst (masktype, -1);
5486 mask = vect_init_vector (stmt, mask, masktype, NULL);
5487
5488 scale = build_int_cst (scaletype, scatter_scale);
5489
5490 prev_stmt_info = NULL;
5491 for (j = 0; j < ncopies; ++j)
5492 {
5493 if (j == 0)
5494 {
5495 src = vec_oprnd1
81c40241 5496 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
3bab6342 5497 op = vec_oprnd0
81c40241 5498 = vect_get_vec_def_for_operand (scatter_off, stmt);
3bab6342
AT
5499 }
5500 else if (modifier != NONE && (j & 1))
5501 {
5502 if (modifier == WIDEN)
5503 {
5504 src = vec_oprnd1
5505 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5506 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5507 stmt, gsi);
5508 }
5509 else if (modifier == NARROW)
5510 {
5511 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5512 stmt, gsi);
5513 op = vec_oprnd0
5514 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5515 }
5516 else
5517 gcc_unreachable ();
5518 }
5519 else
5520 {
5521 src = vec_oprnd1
5522 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5523 op = vec_oprnd0
5524 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5525 }
5526
5527 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5528 {
5529 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5530 == TYPE_VECTOR_SUBPARTS (srctype));
0e22bb5a 5531 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
5532 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5533 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5535 src = var;
5536 }
5537
5538 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5539 {
5540 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5541 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 5542 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
5543 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5544 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5545 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5546 op = var;
5547 }
5548
5549 new_stmt
5550 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5551
5552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5553
5554 if (prev_stmt_info == NULL)
5555 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5556 else
5557 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5558 prev_stmt_info = vinfo_for_stmt (new_stmt);
5559 }
5560 return true;
5561 }
5562
0d0293ac 5563 if (grouped_store)
ebfd146a
IR
5564 {
5565 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5566 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5567
e14c1050 5568 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5569
5570 /* FORNOW */
a70d6342 5571 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5572
5573 /* We vectorize all the stmts of the interleaving group when we
5574 reach the last stmt in the group. */
e14c1050
IR
5575 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5576 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5577 && !slp)
5578 {
5579 *vec_stmt = NULL;
5580 return true;
5581 }
5582
5583 if (slp)
4b5caab7 5584 {
0d0293ac 5585 grouped_store = false;
4b5caab7
IR
5586 /* VEC_NUM is the number of vect stmts to be created for this
5587 group. */
5588 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5589 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 5590 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 5591 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5592 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5593 }
ebfd146a 5594 else
4b5caab7
IR
5595 /* VEC_NUM is the number of vect stmts to be created for this
5596 group. */
ebfd146a
IR
5597 vec_num = group_size;
5598 }
b8698a0f 5599 else
ebfd146a
IR
5600 {
5601 first_stmt = stmt;
5602 first_dr = dr;
5603 group_size = vec_num = 1;
ebfd146a 5604 }
b8698a0f 5605
73fbfcad 5606 if (dump_enabled_p ())
78c60e3d 5607 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5608 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5609
f2e2a985
MM
5610 if (STMT_VINFO_STRIDED_P (stmt_info))
5611 {
5612 gimple_stmt_iterator incr_gsi;
5613 bool insert_after;
355fe088 5614 gimple *incr;
f2e2a985
MM
5615 tree offvar;
5616 tree ivstep;
5617 tree running_off;
5618 gimple_seq stmts = NULL;
5619 tree stride_base, stride_step, alias_off;
5620 tree vec_oprnd;
f502d50e 5621 unsigned int g;
f2e2a985
MM
5622
5623 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5624
5625 stride_base
5626 = fold_build_pointer_plus
f502d50e 5627 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 5628 size_binop (PLUS_EXPR,
f502d50e
MM
5629 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5630 convert_to_ptrofftype (DR_INIT(first_dr))));
5631 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
5632
5633 /* For a store with loop-invariant (but other than power-of-2)
5634 stride (i.e. not a grouped access) like so:
5635
5636 for (i = 0; i < n; i += stride)
5637 array[i] = ...;
5638
5639 we generate a new induction variable and new stores from
5640 the components of the (vectorized) rhs:
5641
5642 for (j = 0; ; j += VF*stride)
5643 vectemp = ...;
5644 tmp1 = vectemp[0];
5645 array[j] = tmp1;
5646 tmp2 = vectemp[1];
5647 array[j + stride] = tmp2;
5648 ...
5649 */
5650
cee62fee 5651 unsigned nstores = nunits;
b17dc4d4 5652 unsigned lnel = 1;
cee62fee
MM
5653 tree ltype = elem_type;
5654 if (slp)
5655 {
b17dc4d4
RB
5656 if (group_size < nunits
5657 && nunits % group_size == 0)
5658 {
5659 nstores = nunits / group_size;
5660 lnel = group_size;
5661 ltype = build_vector_type (elem_type, group_size);
5662 }
5663 else if (group_size >= nunits
5664 && group_size % nunits == 0)
5665 {
5666 nstores = 1;
5667 lnel = nunits;
5668 ltype = vectype;
5669 }
cee62fee
MM
5670 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5671 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5672 }
5673
f2e2a985
MM
5674 ivstep = stride_step;
5675 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
b17dc4d4 5676 build_int_cst (TREE_TYPE (ivstep), vf));
f2e2a985
MM
5677
5678 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5679
5680 create_iv (stride_base, ivstep, NULL,
5681 loop, &incr_gsi, insert_after,
5682 &offvar, NULL);
5683 incr = gsi_stmt (incr_gsi);
310213d4 5684 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
5685
5686 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5687 if (stmts)
5688 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5689
5690 prev_stmt_info = NULL;
f502d50e
MM
5691 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5692 next_stmt = first_stmt;
5693 for (g = 0; g < group_size; g++)
f2e2a985 5694 {
f502d50e
MM
5695 running_off = offvar;
5696 if (g)
f2e2a985 5697 {
f502d50e
MM
5698 tree size = TYPE_SIZE_UNIT (ltype);
5699 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 5700 size);
f502d50e 5701 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 5702 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 5703 running_off, pos);
f2e2a985 5704 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 5705 running_off = newoff;
f502d50e 5706 }
b17dc4d4
RB
5707 unsigned int group_el = 0;
5708 unsigned HOST_WIDE_INT
5709 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f502d50e
MM
5710 for (j = 0; j < ncopies; j++)
5711 {
5712 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5713 and first_stmt == stmt. */
5714 if (j == 0)
5715 {
5716 if (slp)
5717 {
5718 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5719 slp_node, -1);
5720 vec_oprnd = vec_oprnds[0];
5721 }
5722 else
5723 {
5724 gcc_assert (gimple_assign_single_p (next_stmt));
5725 op = gimple_assign_rhs1 (next_stmt);
81c40241 5726 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
5727 }
5728 }
f2e2a985 5729 else
f502d50e
MM
5730 {
5731 if (slp)
5732 vec_oprnd = vec_oprnds[j];
5733 else
c079cbac 5734 {
81c40241 5735 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
5736 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5737 }
f502d50e
MM
5738 }
5739
5740 for (i = 0; i < nstores; i++)
5741 {
5742 tree newref, newoff;
355fe088 5743 gimple *incr, *assign;
f502d50e
MM
5744 tree size = TYPE_SIZE (ltype);
5745 /* Extract the i'th component. */
5746 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5747 bitsize_int (i), size);
5748 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5749 size, pos);
5750
5751 elem = force_gimple_operand_gsi (gsi, elem, true,
5752 NULL_TREE, true,
5753 GSI_SAME_STMT);
5754
b17dc4d4
RB
5755 tree this_off = build_int_cst (TREE_TYPE (alias_off),
5756 group_el * elsz);
f502d50e 5757 newref = build2 (MEM_REF, ltype,
b17dc4d4 5758 running_off, this_off);
f502d50e
MM
5759
5760 /* And store it to *running_off. */
5761 assign = gimple_build_assign (newref, elem);
5762 vect_finish_stmt_generation (stmt, assign, gsi);
5763
b17dc4d4
RB
5764 group_el += lnel;
5765 if (! slp
5766 || group_el == group_size)
5767 {
5768 newoff = copy_ssa_name (running_off, NULL);
5769 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5770 running_off, stride_step);
5771 vect_finish_stmt_generation (stmt, incr, gsi);
f502d50e 5772
b17dc4d4
RB
5773 running_off = newoff;
5774 group_el = 0;
5775 }
225ce44b
RB
5776 if (g == group_size - 1
5777 && !slp)
f502d50e
MM
5778 {
5779 if (j == 0 && i == 0)
225ce44b
RB
5780 STMT_VINFO_VEC_STMT (stmt_info)
5781 = *vec_stmt = assign;
f502d50e
MM
5782 else
5783 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5784 prev_stmt_info = vinfo_for_stmt (assign);
5785 }
5786 }
f2e2a985 5787 }
f502d50e 5788 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b17dc4d4
RB
5789 if (slp)
5790 break;
f2e2a985
MM
5791 }
5792 return true;
5793 }
5794
9771b263
DN
5795 dr_chain.create (group_size);
5796 oprnds.create (group_size);
ebfd146a 5797
720f5239 5798 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5799 gcc_assert (alignment_support_scheme);
272c6793
RS
5800 /* Targets with store-lane instructions must not require explicit
5801 realignment. */
5802 gcc_assert (!store_lanes_p
5803 || alignment_support_scheme == dr_aligned
5804 || alignment_support_scheme == dr_unaligned_supported);
5805
09dfa495
BM
5806 if (negative)
5807 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5808
272c6793
RS
5809 if (store_lanes_p)
5810 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5811 else
5812 aggr_type = vectype;
ebfd146a
IR
5813
5814 /* In case the vectorization factor (VF) is bigger than the number
5815 of elements that we can fit in a vectype (nunits), we have to generate
5816 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5817 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5818 vect_get_vec_def_for_copy_stmt. */
5819
0d0293ac 5820 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5821
5822 S1: &base + 2 = x2
5823 S2: &base = x0
5824 S3: &base + 1 = x1
5825 S4: &base + 3 = x3
5826
5827 We create vectorized stores starting from base address (the access of the
5828 first stmt in the chain (S2 in the above example), when the last store stmt
5829 of the chain (S4) is reached:
5830
5831 VS1: &base = vx2
5832 VS2: &base + vec_size*1 = vx0
5833 VS3: &base + vec_size*2 = vx1
5834 VS4: &base + vec_size*3 = vx3
5835
5836 Then permutation statements are generated:
5837
3fcc1b55
JJ
5838 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5839 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5840 ...
b8698a0f 5841
ebfd146a
IR
5842 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5843 (the order of the data-refs in the output of vect_permute_store_chain
5844 corresponds to the order of scalar stmts in the interleaving chain - see
5845 the documentation of vect_permute_store_chain()).
5846
5847 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5848 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5849 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5850 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5851 */
5852
5853 prev_stmt_info = NULL;
5854 for (j = 0; j < ncopies; j++)
5855 {
ebfd146a
IR
5856
5857 if (j == 0)
5858 {
5859 if (slp)
5860 {
5861 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5862 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5863 NULL, slp_node, -1);
ebfd146a 5864
9771b263 5865 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5866 }
5867 else
5868 {
b8698a0f
L
5869 /* For interleaved stores we collect vectorized defs for all the
5870 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5871 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5872 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5873
0d0293ac 5874 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5875 OPRNDS are of size 1. */
b8698a0f 5876 next_stmt = first_stmt;
ebfd146a
IR
5877 for (i = 0; i < group_size; i++)
5878 {
b8698a0f
L
5879 /* Since gaps are not supported for interleaved stores,
5880 GROUP_SIZE is the exact number of stmts in the chain.
5881 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5882 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5883 iteration of the loop will be executed. */
5884 gcc_assert (next_stmt
5885 && gimple_assign_single_p (next_stmt));
5886 op = gimple_assign_rhs1 (next_stmt);
5887
81c40241 5888 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
5889 dr_chain.quick_push (vec_oprnd);
5890 oprnds.quick_push (vec_oprnd);
e14c1050 5891 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5892 }
5893 }
5894
5895 /* We should have catched mismatched types earlier. */
5896 gcc_assert (useless_type_conversion_p (vectype,
5897 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5898 bool simd_lane_access_p
5899 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5900 if (simd_lane_access_p
5901 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5902 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5903 && integer_zerop (DR_OFFSET (first_dr))
5904 && integer_zerop (DR_INIT (first_dr))
5905 && alias_sets_conflict_p (get_alias_set (aggr_type),
5906 get_alias_set (DR_REF (first_dr))))
5907 {
5908 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5909 dataref_offset = build_int_cst (reference_alias_ptr_type
5910 (DR_REF (first_dr)), 0);
8928eff3 5911 inv_p = false;
74bf76ed
JJ
5912 }
5913 else
5914 dataref_ptr
5915 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5916 simd_lane_access_p ? loop : NULL,
09dfa495 5917 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5918 simd_lane_access_p, &inv_p);
a70d6342 5919 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5920 }
b8698a0f 5921 else
ebfd146a 5922 {
b8698a0f
L
5923 /* For interleaved stores we created vectorized defs for all the
5924 defs stored in OPRNDS in the previous iteration (previous copy).
5925 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5926 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5927 next copy.
0d0293ac 5928 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5929 OPRNDS are of size 1. */
5930 for (i = 0; i < group_size; i++)
5931 {
9771b263 5932 op = oprnds[i];
81c40241 5933 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 5934 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5935 dr_chain[i] = vec_oprnd;
5936 oprnds[i] = vec_oprnd;
ebfd146a 5937 }
74bf76ed
JJ
5938 if (dataref_offset)
5939 dataref_offset
5940 = int_const_binop (PLUS_EXPR, dataref_offset,
5941 TYPE_SIZE_UNIT (aggr_type));
5942 else
5943 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5944 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5945 }
5946
272c6793 5947 if (store_lanes_p)
ebfd146a 5948 {
272c6793 5949 tree vec_array;
267d3070 5950
272c6793
RS
5951 /* Combine all the vectors into an array. */
5952 vec_array = create_vector_array (vectype, vec_num);
5953 for (i = 0; i < vec_num; i++)
c2d7ab2a 5954 {
9771b263 5955 vec_oprnd = dr_chain[i];
272c6793 5956 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5957 }
b8698a0f 5958
272c6793
RS
5959 /* Emit:
5960 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5961 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5962 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5963 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5964 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5965 }
5966 else
5967 {
5968 new_stmt = NULL;
0d0293ac 5969 if (grouped_store)
272c6793 5970 {
b6b9227d
JJ
5971 if (j == 0)
5972 result_chain.create (group_size);
272c6793
RS
5973 /* Permute. */
5974 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5975 &result_chain);
5976 }
c2d7ab2a 5977
272c6793
RS
5978 next_stmt = first_stmt;
5979 for (i = 0; i < vec_num; i++)
5980 {
644ffefd 5981 unsigned align, misalign;
272c6793
RS
5982
5983 if (i > 0)
5984 /* Bump the vector pointer. */
5985 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5986 stmt, NULL_TREE);
5987
5988 if (slp)
9771b263 5989 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5990 else if (grouped_store)
5991 /* For grouped stores vectorized defs are interleaved in
272c6793 5992 vect_permute_store_chain(). */
9771b263 5993 vec_oprnd = result_chain[i];
272c6793 5994
aed93b23
RB
5995 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5996 dataref_ptr,
5997 dataref_offset
5998 ? dataref_offset
5999 : build_int_cst (reference_alias_ptr_type
6000 (DR_REF (first_dr)), 0));
644ffefd 6001 align = TYPE_ALIGN_UNIT (vectype);
272c6793 6002 if (aligned_access_p (first_dr))
644ffefd 6003 misalign = 0;
272c6793
RS
6004 else if (DR_MISALIGNMENT (first_dr) == -1)
6005 {
52639a61
RB
6006 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6007 align = TYPE_ALIGN_UNIT (elem_type);
6008 else
6009 align = get_object_alignment (DR_REF (first_dr))
6010 / BITS_PER_UNIT;
6011 misalign = 0;
272c6793
RS
6012 TREE_TYPE (data_ref)
6013 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 6014 align * BITS_PER_UNIT);
272c6793
RS
6015 }
6016 else
6017 {
6018 TREE_TYPE (data_ref)
6019 = build_aligned_type (TREE_TYPE (data_ref),
6020 TYPE_ALIGN (elem_type));
644ffefd 6021 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6022 }
aed93b23
RB
6023 if (dataref_offset == NULL_TREE
6024 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6025 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6026 misalign);
c2d7ab2a 6027
f234d260
BM
6028 if (negative
6029 && dt != vect_constant_def
6030 && dt != vect_external_def)
09dfa495
BM
6031 {
6032 tree perm_mask = perm_mask_for_reverse (vectype);
6033 tree perm_dest
6034 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6035 vectype);
b731b390 6036 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6037
6038 /* Generate the permute statement. */
355fe088 6039 gimple *perm_stmt
0d0e4a03
JJ
6040 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6041 vec_oprnd, perm_mask);
09dfa495
BM
6042 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6043
6044 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6045 vec_oprnd = new_temp;
6046 }
6047
272c6793
RS
6048 /* Arguments are ready. Create the new vector stmt. */
6049 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6050 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6051
6052 if (slp)
6053 continue;
6054
e14c1050 6055 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6056 if (!next_stmt)
6057 break;
6058 }
ebfd146a 6059 }
1da0876c
RS
6060 if (!slp)
6061 {
6062 if (j == 0)
6063 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6064 else
6065 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6066 prev_stmt_info = vinfo_for_stmt (new_stmt);
6067 }
ebfd146a
IR
6068 }
6069
9771b263
DN
6070 dr_chain.release ();
6071 oprnds.release ();
6072 result_chain.release ();
6073 vec_oprnds.release ();
ebfd146a
IR
6074
6075 return true;
6076}
6077
557be5a8
AL
6078/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6079 VECTOR_CST mask. No checks are made that the target platform supports the
6080 mask, so callers may wish to test can_vec_perm_p separately, or use
6081 vect_gen_perm_mask_checked. */
a1e53f3f 6082
3fcc1b55 6083tree
557be5a8 6084vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
a1e53f3f 6085{
d2a12ae7 6086 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 6087 int i, nunits;
a1e53f3f 6088
22e4dee7 6089 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 6090
96f9265a
RG
6091 mask_elt_type = lang_hooks.types.type_for_mode
6092 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 6093 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 6094
d2a12ae7 6095 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 6096 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
6097 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6098 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 6099
2635892a 6100 return mask_vec;
a1e53f3f
L
6101}
6102
cf7aa6a3
AL
6103/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6104 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6105
6106tree
6107vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6108{
6109 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6110 return vect_gen_perm_mask_any (vectype, sel);
6111}
6112
aec7ae7d
JJ
6113/* Given a vector variable X and Y, that was generated for the scalar
6114 STMT, generate instructions to permute the vector elements of X and Y
6115 using permutation mask MASK_VEC, insert them at *GSI and return the
6116 permuted vector variable. */
a1e53f3f
L
6117
6118static tree
355fe088 6119permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6120 gimple_stmt_iterator *gsi)
a1e53f3f
L
6121{
6122 tree vectype = TREE_TYPE (x);
aec7ae7d 6123 tree perm_dest, data_ref;
355fe088 6124 gimple *perm_stmt;
a1e53f3f 6125
acdcd61b 6126 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 6127 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6128
6129 /* Generate the permute statement. */
0d0e4a03 6130 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6131 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6132
6133 return data_ref;
6134}
6135
6b916b36
RB
6136/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6137 inserting them on the loops preheader edge. Returns true if we
6138 were successful in doing so (and thus STMT can be moved then),
6139 otherwise returns false. */
6140
6141static bool
355fe088 6142hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6143{
6144 ssa_op_iter i;
6145 tree op;
6146 bool any = false;
6147
6148 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6149 {
355fe088 6150 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6151 if (!gimple_nop_p (def_stmt)
6152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6153 {
6154 /* Make sure we don't need to recurse. While we could do
6155 so in simple cases when there are more complex use webs
6156 we don't have an easy way to preserve stmt order to fulfil
6157 dependencies within them. */
6158 tree op2;
6159 ssa_op_iter i2;
d1417442
JJ
6160 if (gimple_code (def_stmt) == GIMPLE_PHI)
6161 return false;
6b916b36
RB
6162 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6163 {
355fe088 6164 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6165 if (!gimple_nop_p (def_stmt2)
6166 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6167 return false;
6168 }
6169 any = true;
6170 }
6171 }
6172
6173 if (!any)
6174 return true;
6175
6176 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6177 {
355fe088 6178 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6179 if (!gimple_nop_p (def_stmt)
6180 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6181 {
6182 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6183 gsi_remove (&gsi, false);
6184 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6185 }
6186 }
6187
6188 return true;
6189}
6190
ebfd146a
IR
6191/* vectorizable_load.
6192
b8698a0f
L
6193 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6194 can be vectorized.
6195 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6196 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6197 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6198
6199static bool
355fe088 6200vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6201 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6202{
6203 tree scalar_dest;
6204 tree vec_dest = NULL;
6205 tree data_ref = NULL;
6206 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6207 stmt_vec_info prev_stmt_info;
ebfd146a 6208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6209 struct loop *loop = NULL;
ebfd146a 6210 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6211 bool nested_in_vect_loop = false;
c716e67f 6212 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6213 tree elem_type;
ebfd146a 6214 tree new_temp;
ef4bddc2 6215 machine_mode mode;
355fe088 6216 gimple *new_stmt = NULL;
ebfd146a
IR
6217 tree dummy;
6218 enum dr_alignment_support alignment_support_scheme;
6219 tree dataref_ptr = NULL_TREE;
74bf76ed 6220 tree dataref_offset = NULL_TREE;
355fe088 6221 gimple *ptr_incr = NULL;
ebfd146a 6222 int ncopies;
9b999e8c 6223 int i, j, group_size = -1, group_gap_adj;
ebfd146a
IR
6224 tree msq = NULL_TREE, lsq;
6225 tree offset = NULL_TREE;
356bbc4c 6226 tree byte_offset = NULL_TREE;
ebfd146a 6227 tree realignment_token = NULL_TREE;
538dd0b7 6228 gphi *phi = NULL;
6e1aa848 6229 vec<tree> dr_chain = vNULL;
0d0293ac 6230 bool grouped_load = false;
272c6793 6231 bool load_lanes_p = false;
355fe088 6232 gimple *first_stmt;
4f0a0218 6233 gimple *first_stmt_for_drptr = NULL;
ebfd146a 6234 bool inv_p;
319e6439 6235 bool negative = false;
ebfd146a
IR
6236 bool compute_in_loop = false;
6237 struct loop *at_loop;
6238 int vec_num;
6239 bool slp = (slp_node != NULL);
6240 bool slp_perm = false;
6241 enum tree_code code;
a70d6342
IR
6242 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6243 int vf;
272c6793 6244 tree aggr_type;
aec7ae7d
JJ
6245 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6246 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6247 int gather_scale = 1;
6248 enum vect_def_type gather_dt = vect_unknown_def_type;
310213d4 6249 vec_info *vinfo = stmt_info->vinfo;
a70d6342 6250
465c8c19
JJ
6251 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6252 return false;
6253
66c16fd9
RB
6254 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6255 && ! vec_stmt)
465c8c19
JJ
6256 return false;
6257
6258 /* Is vectorizable load? */
6259 if (!is_gimple_assign (stmt))
6260 return false;
6261
6262 scalar_dest = gimple_assign_lhs (stmt);
6263 if (TREE_CODE (scalar_dest) != SSA_NAME)
6264 return false;
6265
6266 code = gimple_assign_rhs_code (stmt);
6267 if (code != ARRAY_REF
6268 && code != BIT_FIELD_REF
6269 && code != INDIRECT_REF
6270 && code != COMPONENT_REF
6271 && code != IMAGPART_EXPR
6272 && code != REALPART_EXPR
6273 && code != MEM_REF
6274 && TREE_CODE_CLASS (code) != tcc_declaration)
6275 return false;
6276
6277 if (!STMT_VINFO_DATA_REF (stmt_info))
6278 return false;
6279
6280 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6281 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6282
a70d6342
IR
6283 if (loop_vinfo)
6284 {
6285 loop = LOOP_VINFO_LOOP (loop_vinfo);
6286 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6287 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6288 }
6289 else
3533e503 6290 vf = 1;
ebfd146a
IR
6291
6292 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6293 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6294 case of SLP. */
fce57248 6295 if (slp)
ebfd146a
IR
6296 ncopies = 1;
6297 else
6298 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6299
6300 gcc_assert (ncopies >= 1);
6301
6302 /* FORNOW. This restriction should be relaxed. */
6303 if (nested_in_vect_loop && ncopies > 1)
6304 {
73fbfcad 6305 if (dump_enabled_p ())
78c60e3d 6306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6307 "multiple types in nested loop.\n");
ebfd146a
IR
6308 return false;
6309 }
6310
f2556b68
RB
6311 /* Invalidate assumptions made by dependence analysis when vectorization
6312 on the unrolled body effectively re-orders stmts. */
6313 if (ncopies > 1
6314 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6315 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6316 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6317 {
6318 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6320 "cannot perform implicit CSE when unrolling "
6321 "with negative dependence distance\n");
6322 return false;
6323 }
6324
7b7b1813 6325 elem_type = TREE_TYPE (vectype);
947131ba 6326 mode = TYPE_MODE (vectype);
ebfd146a
IR
6327
6328 /* FORNOW. In some cases can vectorize even if data-type not supported
6329 (e.g. - data copies). */
947131ba 6330 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6331 {
73fbfcad 6332 if (dump_enabled_p ())
78c60e3d 6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6334 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6335 return false;
6336 }
6337
ebfd146a 6338 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6339 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6340 {
0d0293ac 6341 grouped_load = true;
ebfd146a 6342 /* FORNOW */
3bab6342 6343 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6344
e14c1050 6345 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72
RS
6346 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6347
fce57248 6348 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
d3465d72
RS
6349 {
6350 if (vect_load_lanes_supported (vectype, group_size))
6351 load_lanes_p = true;
6352 else if (!vect_grouped_load_supported (vectype, group_size))
6353 return false;
6354 }
d5f035ea
RB
6355
6356 /* If this is single-element interleaving with an element distance
6357 that leaves unused vector loads around punt - we at least create
6358 very sub-optimal code in that case (and blow up memory,
6359 see PR65518). */
6360 if (first_stmt == stmt
72c0f643
RB
6361 && !GROUP_NEXT_ELEMENT (stmt_info))
6362 {
6363 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6364 {
6365 if (dump_enabled_p ())
6366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6367 "single-element interleaving not supported "
6368 "for not adjacent vector loads\n");
6369 return false;
6370 }
6371
6372 /* Single-element interleaving requires peeling for gaps. */
836dbb1a 6373 gcc_assert (GROUP_GAP (stmt_info));
72c0f643
RB
6374 }
6375
6376 /* If there is a gap in the end of the group or the group size cannot
6377 be made a multiple of the vector element count then we access excess
6378 elements in the last iteration and thus need to peel that off. */
6379 if (loop_vinfo
6380 && ! STMT_VINFO_STRIDED_P (stmt_info)
836dbb1a 6381 && (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
d3465d72 6382 || (!slp && !load_lanes_p && vf % group_size != 0)))
d5f035ea
RB
6383 {
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
72c0f643
RB
6386 "Data access with gaps requires scalar "
6387 "epilogue loop\n");
6388 if (loop->inner)
6389 {
6390 if (dump_enabled_p ())
6391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6392 "Peeling for outer loop is not supported\n");
6393 return false;
6394 }
6395
6396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
d5f035ea
RB
6397 }
6398
b1af7da6
RB
6399 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6400 slp_perm = true;
6401
47d3fdb2
RB
6402 /* ??? The following is overly pessimistic (as well as the loop
6403 case above) in the case we can statically determine the excess
6404 elements loaded are within the bounds of a decl that is accessed.
6405 Likewise for BB vectorizations using masked loads is a possibility. */
6406 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6407 {
6408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6409 "BB vectorization with gaps at the end of a load "
6410 "is not supported\n");
6411 return false;
6412 }
6413
f2556b68
RB
6414 /* Invalidate assumptions made by dependence analysis when vectorization
6415 on the unrolled body effectively re-orders stmts. */
6416 if (!PURE_SLP_STMT (stmt_info)
6417 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6418 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6419 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6420 {
6421 if (dump_enabled_p ())
6422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6423 "cannot perform implicit CSE when performing "
6424 "group loads with negative dependence distance\n");
6425 return false;
6426 }
96bb56b2
RB
6427
6428 /* Similarly when the stmt is a load that is both part of a SLP
6429 instance and a loop vectorized stmt via the same-dr mechanism
6430 we have to give up. */
6431 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6432 && (STMT_SLP_TYPE (stmt_info)
6433 != STMT_SLP_TYPE (vinfo_for_stmt
6434 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6435 {
6436 if (dump_enabled_p ())
6437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6438 "conflicting SLP types for CSEd load\n");
6439 return false;
6440 }
ebfd146a
IR
6441 }
6442
a1e53f3f 6443
3bab6342 6444 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
aec7ae7d 6445 {
355fe088 6446 gimple *def_stmt;
3bab6342
AT
6447 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6448 &gather_off, &gather_scale);
aec7ae7d 6449 gcc_assert (gather_decl);
81c40241
RB
6450 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6451 &gather_off_vectype))
aec7ae7d 6452 {
73fbfcad 6453 if (dump_enabled_p ())
78c60e3d 6454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6455 "gather index use not simple.\n");
aec7ae7d
JJ
6456 return false;
6457 }
6458 }
f2e2a985 6459 else if (STMT_VINFO_STRIDED_P (stmt_info))
e09b4c37 6460 ;
319e6439
RG
6461 else
6462 {
6463 negative = tree_int_cst_compare (nested_in_vect_loop
6464 ? STMT_VINFO_DR_STEP (stmt_info)
6465 : DR_STEP (dr),
6466 size_zero_node) < 0;
6467 if (negative && ncopies > 1)
6468 {
73fbfcad 6469 if (dump_enabled_p ())
78c60e3d 6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6471 "multiple types with negative step.\n");
319e6439
RG
6472 return false;
6473 }
6474
6475 if (negative)
6476 {
08940f33
RB
6477 if (grouped_load)
6478 {
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6481 "negative step for group load not supported"
6482 "\n");
08940f33
RB
6483 return false;
6484 }
319e6439
RG
6485 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6486 if (alignment_support_scheme != dr_aligned
6487 && alignment_support_scheme != dr_unaligned_supported)
6488 {
73fbfcad 6489 if (dump_enabled_p ())
78c60e3d 6490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6491 "negative step but alignment required.\n");
319e6439
RG
6492 return false;
6493 }
6494 if (!perm_mask_for_reverse (vectype))
6495 {
73fbfcad 6496 if (dump_enabled_p ())
78c60e3d 6497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6498 "negative step and reversing not supported."
6499 "\n");
319e6439
RG
6500 return false;
6501 }
6502 }
7d75abc8 6503 }
aec7ae7d 6504
ebfd146a
IR
6505 if (!vec_stmt) /* transformation not required. */
6506 {
6507 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6508 /* The SLP costs are calculated during SLP analysis. */
6509 if (!PURE_SLP_STMT (stmt_info))
6510 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6511 NULL, NULL, NULL);
ebfd146a
IR
6512 return true;
6513 }
6514
73fbfcad 6515 if (dump_enabled_p ())
78c60e3d 6516 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6517 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
6518
6519 /** Transform. **/
6520
c716e67f
XDL
6521 ensure_base_align (stmt_info, dr);
6522
3bab6342 6523 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
aec7ae7d
JJ
6524 {
6525 tree vec_oprnd0 = NULL_TREE, op;
6526 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6527 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6528 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6529 edge pe = loop_preheader_edge (loop);
6530 gimple_seq seq;
6531 basic_block new_bb;
6532 enum { NARROW, NONE, WIDEN } modifier;
6533 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6534
6535 if (nunits == gather_off_nunits)
6536 modifier = NONE;
6537 else if (nunits == gather_off_nunits / 2)
6538 {
6539 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6540 modifier = WIDEN;
6541
6542 for (i = 0; i < gather_off_nunits; ++i)
6543 sel[i] = i | nunits;
6544
557be5a8 6545 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
aec7ae7d
JJ
6546 }
6547 else if (nunits == gather_off_nunits * 2)
6548 {
6549 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6550 modifier = NARROW;
6551
6552 for (i = 0; i < nunits; ++i)
6553 sel[i] = i < gather_off_nunits
6554 ? i : i + nunits - gather_off_nunits;
6555
557be5a8 6556 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
aec7ae7d
JJ
6557 ncopies *= 2;
6558 }
6559 else
6560 gcc_unreachable ();
6561
6562 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6563 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6564 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6565 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6566 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6567 scaletype = TREE_VALUE (arglist);
d3c2fee0 6568 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6569
6570 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6571
6572 ptr = fold_convert (ptrtype, gather_base);
6573 if (!is_gimple_min_invariant (ptr))
6574 {
6575 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6576 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6577 gcc_assert (!new_bb);
6578 }
6579
6580 /* Currently we support only unconditional gather loads,
6581 so mask should be all ones. */
d3c2fee0
AI
6582 if (TREE_CODE (masktype) == INTEGER_TYPE)
6583 mask = build_int_cst (masktype, -1);
6584 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6585 {
6586 mask = build_int_cst (TREE_TYPE (masktype), -1);
6587 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6588 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6589 }
aec7ae7d
JJ
6590 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6591 {
6592 REAL_VALUE_TYPE r;
6593 long tmp[6];
6594 for (j = 0; j < 6; ++j)
6595 tmp[j] = -1;
6596 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6597 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6598 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6599 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6600 }
6601 else
6602 gcc_unreachable ();
aec7ae7d
JJ
6603
6604 scale = build_int_cst (scaletype, gather_scale);
6605
d3c2fee0
AI
6606 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6607 merge = build_int_cst (TREE_TYPE (rettype), 0);
6608 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6609 {
6610 REAL_VALUE_TYPE r;
6611 long tmp[6];
6612 for (j = 0; j < 6; ++j)
6613 tmp[j] = 0;
6614 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6615 merge = build_real (TREE_TYPE (rettype), r);
6616 }
6617 else
6618 gcc_unreachable ();
6619 merge = build_vector_from_val (rettype, merge);
6620 merge = vect_init_vector (stmt, merge, rettype, NULL);
6621
aec7ae7d
JJ
6622 prev_stmt_info = NULL;
6623 for (j = 0; j < ncopies; ++j)
6624 {
6625 if (modifier == WIDEN && (j & 1))
6626 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6627 perm_mask, stmt, gsi);
6628 else if (j == 0)
6629 op = vec_oprnd0
81c40241 6630 = vect_get_vec_def_for_operand (gather_off, stmt);
aec7ae7d
JJ
6631 else
6632 op = vec_oprnd0
6633 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6634
6635 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6636 {
6637 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6638 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 6639 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
aec7ae7d
JJ
6640 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6641 new_stmt
0d0e4a03 6642 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6643 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6644 op = var;
6645 }
6646
6647 new_stmt
d3c2fee0 6648 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6649
6650 if (!useless_type_conversion_p (vectype, rettype))
6651 {
6652 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6653 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 6654 op = vect_get_new_ssa_name (rettype, vect_simple_var);
aec7ae7d
JJ
6655 gimple_call_set_lhs (new_stmt, op);
6656 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6657 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6658 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6659 new_stmt
0d0e4a03 6660 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6661 }
6662 else
6663 {
6664 var = make_ssa_name (vec_dest, new_stmt);
6665 gimple_call_set_lhs (new_stmt, var);
6666 }
6667
6668 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6669
6670 if (modifier == NARROW)
6671 {
6672 if ((j & 1) == 0)
6673 {
6674 prev_res = var;
6675 continue;
6676 }
6677 var = permute_vec_elements (prev_res, var,
6678 perm_mask, stmt, gsi);
6679 new_stmt = SSA_NAME_DEF_STMT (var);
6680 }
6681
6682 if (prev_stmt_info == NULL)
6683 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6684 else
6685 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6686 prev_stmt_info = vinfo_for_stmt (new_stmt);
6687 }
6688 return true;
6689 }
f2e2a985 6690 else if (STMT_VINFO_STRIDED_P (stmt_info))
7d75abc8
MM
6691 {
6692 gimple_stmt_iterator incr_gsi;
6693 bool insert_after;
355fe088 6694 gimple *incr;
7d75abc8 6695 tree offvar;
7d75abc8
MM
6696 tree ivstep;
6697 tree running_off;
9771b263 6698 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 6699 gimple_seq stmts = NULL;
14ac6aa2
RB
6700 tree stride_base, stride_step, alias_off;
6701
6702 gcc_assert (!nested_in_vect_loop);
7d75abc8 6703
f502d50e 6704 if (slp && grouped_load)
ab313a8c
RB
6705 first_dr = STMT_VINFO_DATA_REF
6706 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6707 else
6708 first_dr = dr;
6709
14ac6aa2
RB
6710 stride_base
6711 = fold_build_pointer_plus
ab313a8c 6712 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 6713 size_binop (PLUS_EXPR,
ab313a8c
RB
6714 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6715 convert_to_ptrofftype (DR_INIT (first_dr))));
6716 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
6717
6718 /* For a load with loop-invariant (but other than power-of-2)
6719 stride (i.e. not a grouped access) like so:
6720
6721 for (i = 0; i < n; i += stride)
6722 ... = array[i];
6723
6724 we generate a new induction variable and new accesses to
6725 form a new vector (or vectors, depending on ncopies):
6726
6727 for (j = 0; ; j += VF*stride)
6728 tmp1 = array[j];
6729 tmp2 = array[j + stride];
6730 ...
6731 vectemp = {tmp1, tmp2, ...}
6732 */
6733
ab313a8c
RB
6734 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6735 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
6736
6737 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6738
ab313a8c 6739 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
6740 loop, &incr_gsi, insert_after,
6741 &offvar, NULL);
6742 incr = gsi_stmt (incr_gsi);
310213d4 6743 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 6744
ab313a8c
RB
6745 stride_step = force_gimple_operand (unshare_expr (stride_step),
6746 &stmts, true, NULL_TREE);
7d75abc8
MM
6747 if (stmts)
6748 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6749
6750 prev_stmt_info = NULL;
6751 running_off = offvar;
ab313a8c 6752 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
7b5fc413 6753 int nloads = nunits;
e09b4c37 6754 int lnel = 1;
7b5fc413 6755 tree ltype = TREE_TYPE (vectype);
b266b968 6756 auto_vec<tree> dr_chain;
7b5fc413
RB
6757 if (slp)
6758 {
e09b4c37
RB
6759 if (group_size < nunits
6760 && nunits % group_size == 0)
6761 {
6762 nloads = nunits / group_size;
6763 lnel = group_size;
6764 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6765 ltype = build_aligned_type (ltype,
6766 TYPE_ALIGN (TREE_TYPE (vectype)));
6767 }
6768 else if (group_size >= nunits
6769 && group_size % nunits == 0)
6770 {
6771 nloads = 1;
6772 lnel = nunits;
6773 ltype = vectype;
6774 ltype = build_aligned_type (ltype,
6775 TYPE_ALIGN (TREE_TYPE (vectype)));
6776 }
66c16fd9
RB
6777 /* For SLP permutation support we need to load the whole group,
6778 not only the number of vector stmts the permutation result
6779 fits in. */
b266b968 6780 if (slp_perm)
66c16fd9
RB
6781 {
6782 ncopies = (group_size * vf + nunits - 1) / nunits;
6783 dr_chain.create (ncopies);
6784 }
6785 else
6786 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 6787 }
e09b4c37
RB
6788 int group_el = 0;
6789 unsigned HOST_WIDE_INT
6790 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7d75abc8
MM
6791 for (j = 0; j < ncopies; j++)
6792 {
7b5fc413 6793 if (nloads > 1)
e09b4c37
RB
6794 vec_alloc (v, nloads);
6795 for (i = 0; i < nloads; i++)
7b5fc413 6796 {
e09b4c37
RB
6797 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6798 group_el * elsz);
6799 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6800 build2 (MEM_REF, ltype,
6801 running_off, this_off));
6802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6803 if (nloads > 1)
6804 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
6805 gimple_assign_lhs (new_stmt));
6806
6807 group_el += lnel;
6808 if (! slp
6809 || group_el == group_size)
7b5fc413 6810 {
e09b4c37
RB
6811 tree newoff = copy_ssa_name (running_off);
6812 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6813 running_off, stride_step);
7b5fc413
RB
6814 vect_finish_stmt_generation (stmt, incr, gsi);
6815
6816 running_off = newoff;
e09b4c37 6817 group_el = 0;
7b5fc413 6818 }
7b5fc413 6819 }
e09b4c37 6820 if (nloads > 1)
7d75abc8 6821 {
e09b4c37
RB
6822 tree vec_inv = build_constructor (vectype, v);
6823 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6824 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7d75abc8
MM
6825 }
6826
7b5fc413 6827 if (slp)
b266b968 6828 {
b266b968
RB
6829 if (slp_perm)
6830 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
6831 else
6832 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 6833 }
7d75abc8 6834 else
225ce44b
RB
6835 {
6836 if (j == 0)
6837 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6838 else
6839 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6840 prev_stmt_info = vinfo_for_stmt (new_stmt);
6841 }
7d75abc8 6842 }
b266b968
RB
6843 if (slp_perm)
6844 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6845 slp_node_instance, false);
7d75abc8
MM
6846 return true;
6847 }
aec7ae7d 6848
0d0293ac 6849 if (grouped_load)
ebfd146a 6850 {
e14c1050 6851 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4f0a0218 6852 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
6853 without permutation. */
6854 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
6855 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6856 /* For BB vectorization always use the first stmt to base
6857 the data ref pointer on. */
6858 if (bb_vinfo)
6859 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 6860
ebfd146a 6861 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
6862 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6863 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6864 ??? But we can only do so if there is exactly one
6865 as we have no way to get at the rest. Leave the CSE
6866 opportunity alone.
6867 ??? With the group load eventually participating
6868 in multiple different permutations (having multiple
6869 slp nodes which refer to the same group) the CSE
6870 is even wrong code. See PR56270. */
6871 && !slp)
ebfd146a
IR
6872 {
6873 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6874 return true;
6875 }
6876 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6877 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
9b999e8c 6878 group_gap_adj = 0;
ebfd146a
IR
6879
6880 /* VEC_NUM is the number of vect stmts to be created for this group. */
6881 if (slp)
6882 {
0d0293ac 6883 grouped_load = false;
91ff1504
RB
6884 /* For SLP permutation support we need to load the whole group,
6885 not only the number of vector stmts the permutation result
6886 fits in. */
6887 if (slp_perm)
6888 vec_num = (group_size * vf + nunits - 1) / nunits;
6889 else
6890 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9b999e8c 6891 group_gap_adj = vf * group_size - nunits * vec_num;
a70d6342 6892 }
ebfd146a 6893 else
9b999e8c 6894 vec_num = group_size;
ebfd146a
IR
6895 }
6896 else
6897 {
6898 first_stmt = stmt;
6899 first_dr = dr;
6900 group_size = vec_num = 1;
9b999e8c 6901 group_gap_adj = 0;
ebfd146a
IR
6902 }
6903
720f5239 6904 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6905 gcc_assert (alignment_support_scheme);
272c6793
RS
6906 /* Targets with load-lane instructions must not require explicit
6907 realignment. */
6908 gcc_assert (!load_lanes_p
6909 || alignment_support_scheme == dr_aligned
6910 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
6911
6912 /* In case the vectorization factor (VF) is bigger than the number
6913 of elements that we can fit in a vectype (nunits), we have to generate
6914 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 6915 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 6916 from one copy of the vector stmt to the next, in the field
ff802fa1 6917 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 6918 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
6919 stmts that use the defs of the current stmt. The example below
6920 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6921 need to create 4 vectorized stmts):
ebfd146a
IR
6922
6923 before vectorization:
6924 RELATED_STMT VEC_STMT
6925 S1: x = memref - -
6926 S2: z = x + 1 - -
6927
6928 step 1: vectorize stmt S1:
6929 We first create the vector stmt VS1_0, and, as usual, record a
6930 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6931 Next, we create the vector stmt VS1_1, and record a pointer to
6932 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6933 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6934 stmts and pointers:
6935 RELATED_STMT VEC_STMT
6936 VS1_0: vx0 = memref0 VS1_1 -
6937 VS1_1: vx1 = memref1 VS1_2 -
6938 VS1_2: vx2 = memref2 VS1_3 -
6939 VS1_3: vx3 = memref3 - -
6940 S1: x = load - VS1_0
6941 S2: z = x + 1 - -
6942
b8698a0f
L
6943 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6944 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6945 stmt S2. */
6946
0d0293ac 6947 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6948
6949 S1: x2 = &base + 2
6950 S2: x0 = &base
6951 S3: x1 = &base + 1
6952 S4: x3 = &base + 3
6953
b8698a0f 6954 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6955 starting from the access of the first stmt of the chain:
6956
6957 VS1: vx0 = &base
6958 VS2: vx1 = &base + vec_size*1
6959 VS3: vx3 = &base + vec_size*2
6960 VS4: vx4 = &base + vec_size*3
6961
6962 Then permutation statements are generated:
6963
e2c83630
RH
6964 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6965 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6966 ...
6967
6968 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6969 (the order of the data-refs in the output of vect_permute_load_chain
6970 corresponds to the order of scalar stmts in the interleaving chain - see
6971 the documentation of vect_permute_load_chain()).
6972 The generation of permutation stmts and recording them in
0d0293ac 6973 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6974
b8698a0f 6975 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6976 permutation stmts above are created for every copy. The result vector
6977 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6978 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6979
6980 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6981 on a target that supports unaligned accesses (dr_unaligned_supported)
6982 we generate the following code:
6983 p = initial_addr;
6984 indx = 0;
6985 loop {
6986 p = p + indx * vectype_size;
6987 vec_dest = *(p);
6988 indx = indx + 1;
6989 }
6990
6991 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6992 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6993 then generate the following code, in which the data in each iteration is
6994 obtained by two vector loads, one from the previous iteration, and one
6995 from the current iteration:
6996 p1 = initial_addr;
6997 msq_init = *(floor(p1))
6998 p2 = initial_addr + VS - 1;
6999 realignment_token = call target_builtin;
7000 indx = 0;
7001 loop {
7002 p2 = p2 + indx * vectype_size
7003 lsq = *(floor(p2))
7004 vec_dest = realign_load (msq, lsq, realignment_token)
7005 indx = indx + 1;
7006 msq = lsq;
7007 } */
7008
7009 /* If the misalignment remains the same throughout the execution of the
7010 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7011 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7012 This can only occur when vectorizing memory accesses in the inner-loop
7013 nested within an outer-loop that is being vectorized. */
7014
d1e4b493 7015 if (nested_in_vect_loop
211bea38 7016 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
7017 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7018 {
7019 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7020 compute_in_loop = true;
7021 }
7022
7023 if ((alignment_support_scheme == dr_explicit_realign_optimized
7024 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7025 && !compute_in_loop)
ebfd146a
IR
7026 {
7027 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7028 alignment_support_scheme, NULL_TREE,
7029 &at_loop);
7030 if (alignment_support_scheme == dr_explicit_realign_optimized)
7031 {
538dd0b7 7032 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7033 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7034 size_one_node);
ebfd146a
IR
7035 }
7036 }
7037 else
7038 at_loop = loop;
7039
a1e53f3f
L
7040 if (negative)
7041 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7042
272c6793
RS
7043 if (load_lanes_p)
7044 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7045 else
7046 aggr_type = vectype;
7047
ebfd146a
IR
7048 prev_stmt_info = NULL;
7049 for (j = 0; j < ncopies; j++)
b8698a0f 7050 {
272c6793 7051 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7052 if (j == 0)
74bf76ed
JJ
7053 {
7054 bool simd_lane_access_p
7055 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7056 if (simd_lane_access_p
7057 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7058 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7059 && integer_zerop (DR_OFFSET (first_dr))
7060 && integer_zerop (DR_INIT (first_dr))
7061 && alias_sets_conflict_p (get_alias_set (aggr_type),
7062 get_alias_set (DR_REF (first_dr)))
7063 && (alignment_support_scheme == dr_aligned
7064 || alignment_support_scheme == dr_unaligned_supported))
7065 {
7066 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7067 dataref_offset = build_int_cst (reference_alias_ptr_type
7068 (DR_REF (first_dr)), 0);
8928eff3 7069 inv_p = false;
74bf76ed 7070 }
4f0a0218
RB
7071 else if (first_stmt_for_drptr
7072 && first_stmt != first_stmt_for_drptr)
7073 {
7074 dataref_ptr
7075 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7076 at_loop, offset, &dummy, gsi,
7077 &ptr_incr, simd_lane_access_p,
7078 &inv_p, byte_offset);
7079 /* Adjust the pointer by the difference to first_stmt. */
7080 data_reference_p ptrdr
7081 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7082 tree diff = fold_convert (sizetype,
7083 size_binop (MINUS_EXPR,
7084 DR_INIT (first_dr),
7085 DR_INIT (ptrdr)));
7086 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7087 stmt, diff);
7088 }
74bf76ed
JJ
7089 else
7090 dataref_ptr
7091 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7092 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7093 simd_lane_access_p, &inv_p,
7094 byte_offset);
74bf76ed
JJ
7095 }
7096 else if (dataref_offset)
7097 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7098 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7099 else
272c6793
RS
7100 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7101 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7102
0d0293ac 7103 if (grouped_load || slp_perm)
9771b263 7104 dr_chain.create (vec_num);
5ce1ee7f 7105
272c6793 7106 if (load_lanes_p)
ebfd146a 7107 {
272c6793
RS
7108 tree vec_array;
7109
7110 vec_array = create_vector_array (vectype, vec_num);
7111
7112 /* Emit:
7113 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7114 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7115 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7116 gimple_call_set_lhs (new_stmt, vec_array);
7117 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7118
272c6793
RS
7119 /* Extract each vector into an SSA_NAME. */
7120 for (i = 0; i < vec_num; i++)
ebfd146a 7121 {
272c6793
RS
7122 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7123 vec_array, i);
9771b263 7124 dr_chain.quick_push (new_temp);
272c6793
RS
7125 }
7126
7127 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7128 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7129 }
7130 else
7131 {
7132 for (i = 0; i < vec_num; i++)
7133 {
7134 if (i > 0)
7135 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7136 stmt, NULL_TREE);
7137
7138 /* 2. Create the vector-load in the loop. */
7139 switch (alignment_support_scheme)
7140 {
7141 case dr_aligned:
7142 case dr_unaligned_supported:
be1ac4ec 7143 {
644ffefd
MJ
7144 unsigned int align, misalign;
7145
272c6793 7146 data_ref
aed93b23
RB
7147 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7148 dataref_offset
7149 ? dataref_offset
7150 : build_int_cst (reference_alias_ptr_type
7151 (DR_REF (first_dr)), 0));
644ffefd 7152 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
7153 if (alignment_support_scheme == dr_aligned)
7154 {
7155 gcc_assert (aligned_access_p (first_dr));
644ffefd 7156 misalign = 0;
272c6793
RS
7157 }
7158 else if (DR_MISALIGNMENT (first_dr) == -1)
7159 {
52639a61
RB
7160 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7161 align = TYPE_ALIGN_UNIT (elem_type);
7162 else
7163 align = (get_object_alignment (DR_REF (first_dr))
7164 / BITS_PER_UNIT);
7165 misalign = 0;
272c6793
RS
7166 TREE_TYPE (data_ref)
7167 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 7168 align * BITS_PER_UNIT);
272c6793
RS
7169 }
7170 else
7171 {
7172 TREE_TYPE (data_ref)
7173 = build_aligned_type (TREE_TYPE (data_ref),
7174 TYPE_ALIGN (elem_type));
644ffefd 7175 misalign = DR_MISALIGNMENT (first_dr);
272c6793 7176 }
aed93b23
RB
7177 if (dataref_offset == NULL_TREE
7178 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7179 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7180 align, misalign);
272c6793 7181 break;
be1ac4ec 7182 }
272c6793 7183 case dr_explicit_realign:
267d3070 7184 {
272c6793 7185 tree ptr, bump;
272c6793 7186
d88981fc 7187 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7188
7189 if (compute_in_loop)
7190 msq = vect_setup_realignment (first_stmt, gsi,
7191 &realignment_token,
7192 dr_explicit_realign,
7193 dataref_ptr, NULL);
7194
aed93b23
RB
7195 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7196 ptr = copy_ssa_name (dataref_ptr);
7197 else
7198 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
0d0e4a03
JJ
7199 new_stmt = gimple_build_assign
7200 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7201 build_int_cst
7202 (TREE_TYPE (dataref_ptr),
7203 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
7204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7205 data_ref
7206 = build2 (MEM_REF, vectype, ptr,
7207 build_int_cst (reference_alias_ptr_type
7208 (DR_REF (first_dr)), 0));
7209 vec_dest = vect_create_destination_var (scalar_dest,
7210 vectype);
7211 new_stmt = gimple_build_assign (vec_dest, data_ref);
7212 new_temp = make_ssa_name (vec_dest, new_stmt);
7213 gimple_assign_set_lhs (new_stmt, new_temp);
7214 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7215 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7216 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7217 msq = new_temp;
7218
d88981fc 7219 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7220 TYPE_SIZE_UNIT (elem_type));
d88981fc 7221 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7222 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7223 new_stmt = gimple_build_assign
7224 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793
RS
7225 build_int_cst
7226 (TREE_TYPE (ptr),
7227 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
aed93b23 7228 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7229 gimple_assign_set_lhs (new_stmt, ptr);
7230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7231 data_ref
7232 = build2 (MEM_REF, vectype, ptr,
7233 build_int_cst (reference_alias_ptr_type
7234 (DR_REF (first_dr)), 0));
7235 break;
267d3070 7236 }
272c6793 7237 case dr_explicit_realign_optimized:
aed93b23
RB
7238 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7239 new_temp = copy_ssa_name (dataref_ptr);
7240 else
7241 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
0d0e4a03
JJ
7242 new_stmt = gimple_build_assign
7243 (new_temp, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7244 build_int_cst
7245 (TREE_TYPE (dataref_ptr),
7246 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
7247 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7248 data_ref
7249 = build2 (MEM_REF, vectype, new_temp,
7250 build_int_cst (reference_alias_ptr_type
7251 (DR_REF (first_dr)), 0));
7252 break;
7253 default:
7254 gcc_unreachable ();
7255 }
ebfd146a 7256 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 7257 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
7258 new_temp = make_ssa_name (vec_dest, new_stmt);
7259 gimple_assign_set_lhs (new_stmt, new_temp);
7260 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7261
272c6793
RS
7262 /* 3. Handle explicit realignment if necessary/supported.
7263 Create in loop:
7264 vec_dest = realign_load (msq, lsq, realignment_token) */
7265 if (alignment_support_scheme == dr_explicit_realign_optimized
7266 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7267 {
272c6793
RS
7268 lsq = gimple_assign_lhs (new_stmt);
7269 if (!realignment_token)
7270 realignment_token = dataref_ptr;
7271 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7272 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7273 msq, lsq, realignment_token);
272c6793
RS
7274 new_temp = make_ssa_name (vec_dest, new_stmt);
7275 gimple_assign_set_lhs (new_stmt, new_temp);
7276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7277
7278 if (alignment_support_scheme == dr_explicit_realign_optimized)
7279 {
7280 gcc_assert (phi);
7281 if (i == vec_num - 1 && j == ncopies - 1)
7282 add_phi_arg (phi, lsq,
7283 loop_latch_edge (containing_loop),
9e227d60 7284 UNKNOWN_LOCATION);
272c6793
RS
7285 msq = lsq;
7286 }
ebfd146a 7287 }
ebfd146a 7288
59fd17e3
RB
7289 /* 4. Handle invariant-load. */
7290 if (inv_p && !bb_vinfo)
7291 {
59fd17e3 7292 gcc_assert (!grouped_load);
d1417442
JJ
7293 /* If we have versioned for aliasing or the loop doesn't
7294 have any data dependencies that would preclude this,
7295 then we are sure this is a loop invariant load and
7296 thus we can insert it on the preheader edge. */
7297 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7298 && !nested_in_vect_loop
6b916b36 7299 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7300 {
7301 if (dump_enabled_p ())
7302 {
7303 dump_printf_loc (MSG_NOTE, vect_location,
7304 "hoisting out of the vectorized "
7305 "loop: ");
7306 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7307 }
b731b390 7308 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7309 gsi_insert_on_edge_immediate
7310 (loop_preheader_edge (loop),
7311 gimple_build_assign (tem,
7312 unshare_expr
7313 (gimple_assign_rhs1 (stmt))));
7314 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7315 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7316 set_vinfo_for_stmt (new_stmt,
7317 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7318 }
7319 else
7320 {
7321 gimple_stmt_iterator gsi2 = *gsi;
7322 gsi_next (&gsi2);
7323 new_temp = vect_init_vector (stmt, scalar_dest,
7324 vectype, &gsi2);
34cd48e5 7325 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7326 }
59fd17e3
RB
7327 }
7328
272c6793
RS
7329 if (negative)
7330 {
aec7ae7d
JJ
7331 tree perm_mask = perm_mask_for_reverse (vectype);
7332 new_temp = permute_vec_elements (new_temp, new_temp,
7333 perm_mask, stmt, gsi);
ebfd146a
IR
7334 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7335 }
267d3070 7336
272c6793 7337 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7338 vect_transform_grouped_load (). */
7339 if (grouped_load || slp_perm)
9771b263 7340 dr_chain.quick_push (new_temp);
267d3070 7341
272c6793
RS
7342 /* Store vector loads in the corresponding SLP_NODE. */
7343 if (slp && !slp_perm)
9771b263 7344 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 7345 }
9b999e8c
RB
7346 /* Bump the vector pointer to account for a gap or for excess
7347 elements loaded for a permuted SLP load. */
7348 if (group_gap_adj != 0)
a64b9c26 7349 {
9b999e8c
RB
7350 bool ovf;
7351 tree bump
7352 = wide_int_to_tree (sizetype,
7353 wi::smul (TYPE_SIZE_UNIT (elem_type),
7354 group_gap_adj, &ovf));
a64b9c26
RB
7355 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7356 stmt, bump);
7357 }
ebfd146a
IR
7358 }
7359
7360 if (slp && !slp_perm)
7361 continue;
7362
7363 if (slp_perm)
7364 {
01d8bf07 7365 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
7366 slp_node_instance, false))
7367 {
9771b263 7368 dr_chain.release ();
ebfd146a
IR
7369 return false;
7370 }
7371 }
7372 else
7373 {
0d0293ac 7374 if (grouped_load)
ebfd146a 7375 {
272c6793 7376 if (!load_lanes_p)
0d0293ac 7377 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7378 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7379 }
7380 else
7381 {
7382 if (j == 0)
7383 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7384 else
7385 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7386 prev_stmt_info = vinfo_for_stmt (new_stmt);
7387 }
7388 }
9771b263 7389 dr_chain.release ();
ebfd146a
IR
7390 }
7391
ebfd146a
IR
7392 return true;
7393}
7394
7395/* Function vect_is_simple_cond.
b8698a0f 7396
ebfd146a
IR
7397 Input:
7398 LOOP - the loop that is being vectorized.
7399 COND - Condition that is checked for simple use.
7400
e9e1d143
RG
7401 Output:
7402 *COMP_VECTYPE - the vector type for the comparison.
7403
ebfd146a
IR
7404 Returns whether a COND can be vectorized. Checks whether
7405 condition operands are supportable using vec_is_simple_use. */
7406
87aab9b2 7407static bool
81c40241 7408vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
ebfd146a
IR
7409{
7410 tree lhs, rhs;
ebfd146a 7411 enum vect_def_type dt;
e9e1d143 7412 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7413
a414c77f
IE
7414 /* Mask case. */
7415 if (TREE_CODE (cond) == SSA_NAME
7416 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7417 {
7418 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7419 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7420 &dt, comp_vectype)
7421 || !*comp_vectype
7422 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7423 return false;
7424 return true;
7425 }
7426
ebfd146a
IR
7427 if (!COMPARISON_CLASS_P (cond))
7428 return false;
7429
7430 lhs = TREE_OPERAND (cond, 0);
7431 rhs = TREE_OPERAND (cond, 1);
7432
7433 if (TREE_CODE (lhs) == SSA_NAME)
7434 {
355fe088 7435 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
81c40241 7436 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
ebfd146a
IR
7437 return false;
7438 }
7439 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7440 && TREE_CODE (lhs) != FIXED_CST)
7441 return false;
7442
7443 if (TREE_CODE (rhs) == SSA_NAME)
7444 {
355fe088 7445 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
81c40241 7446 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
ebfd146a
IR
7447 return false;
7448 }
f7e531cf 7449 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
7450 && TREE_CODE (rhs) != FIXED_CST)
7451 return false;
7452
28b33016
IE
7453 if (vectype1 && vectype2
7454 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7455 return false;
7456
e9e1d143 7457 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
7458 return true;
7459}
7460
7461/* vectorizable_condition.
7462
b8698a0f
L
7463 Check if STMT is conditional modify expression that can be vectorized.
7464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7465 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
7466 at GSI.
7467
7468 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7469 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 7470 else clause if it is 2).
ebfd146a
IR
7471
7472 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7473
4bbe8262 7474bool
355fe088
TS
7475vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7476 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 7477 slp_tree slp_node)
ebfd146a
IR
7478{
7479 tree scalar_dest = NULL_TREE;
7480 tree vec_dest = NULL_TREE;
ebfd146a
IR
7481 tree cond_expr, then_clause, else_clause;
7482 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 7483 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7484 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7485 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 7486 tree vec_compare;
ebfd146a
IR
7487 tree new_temp;
7488 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a855b1b1 7489 enum vect_def_type dt, dts[4];
f7e531cf 7490 int ncopies;
ebfd146a 7491 enum tree_code code;
a855b1b1 7492 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
7493 int i, j;
7494 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
7495 vec<tree> vec_oprnds0 = vNULL;
7496 vec<tree> vec_oprnds1 = vNULL;
7497 vec<tree> vec_oprnds2 = vNULL;
7498 vec<tree> vec_oprnds3 = vNULL;
74946978 7499 tree vec_cmp_type;
a414c77f 7500 bool masked = false;
b8698a0f 7501
f7e531cf
IR
7502 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7503 return false;
7504
af29617a
AH
7505 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7506 {
7507 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7508 return false;
ebfd146a 7509
af29617a
AH
7510 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7511 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7512 && reduc_def))
7513 return false;
ebfd146a 7514
af29617a
AH
7515 /* FORNOW: not yet supported. */
7516 if (STMT_VINFO_LIVE_P (stmt_info))
7517 {
7518 if (dump_enabled_p ())
7519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7520 "value used after loop.\n");
7521 return false;
7522 }
ebfd146a
IR
7523 }
7524
7525 /* Is vectorizable conditional operation? */
7526 if (!is_gimple_assign (stmt))
7527 return false;
7528
7529 code = gimple_assign_rhs_code (stmt);
7530
7531 if (code != COND_EXPR)
7532 return false;
7533
465c8c19
JJ
7534 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7535 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2947d3b2 7536 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 7537
fce57248 7538 if (slp_node)
465c8c19
JJ
7539 ncopies = 1;
7540 else
7541 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7542
7543 gcc_assert (ncopies >= 1);
7544 if (reduc_index && ncopies > 1)
7545 return false; /* FORNOW */
7546
4e71066d
RG
7547 cond_expr = gimple_assign_rhs1 (stmt);
7548 then_clause = gimple_assign_rhs2 (stmt);
7549 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 7550
81c40241 7551 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
e9e1d143 7552 || !comp_vectype)
ebfd146a
IR
7553 return false;
7554
81c40241 7555 gimple *def_stmt;
2947d3b2
IE
7556 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7557 &vectype1))
7558 return false;
7559 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7560 &vectype2))
ebfd146a 7561 return false;
2947d3b2
IE
7562
7563 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7564 return false;
7565
7566 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
7567 return false;
7568
28b33016
IE
7569 masked = !COMPARISON_CLASS_P (cond_expr);
7570 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7571
74946978
MP
7572 if (vec_cmp_type == NULL_TREE)
7573 return false;
784fb9b3 7574
b8698a0f 7575 if (!vec_stmt)
ebfd146a
IR
7576 {
7577 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 7578 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
7579 }
7580
f7e531cf
IR
7581 /* Transform. */
7582
7583 if (!slp_node)
7584 {
9771b263
DN
7585 vec_oprnds0.create (1);
7586 vec_oprnds1.create (1);
7587 vec_oprnds2.create (1);
7588 vec_oprnds3.create (1);
f7e531cf 7589 }
ebfd146a
IR
7590
7591 /* Handle def. */
7592 scalar_dest = gimple_assign_lhs (stmt);
7593 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7594
7595 /* Handle cond expr. */
a855b1b1
MM
7596 for (j = 0; j < ncopies; j++)
7597 {
538dd0b7 7598 gassign *new_stmt = NULL;
a855b1b1
MM
7599 if (j == 0)
7600 {
f7e531cf
IR
7601 if (slp_node)
7602 {
00f96dc9
TS
7603 auto_vec<tree, 4> ops;
7604 auto_vec<vec<tree>, 4> vec_defs;
9771b263 7605
a414c77f
IE
7606 if (masked)
7607 ops.safe_push (cond_expr);
7608 else
7609 {
7610 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7611 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7612 }
9771b263
DN
7613 ops.safe_push (then_clause);
7614 ops.safe_push (else_clause);
f7e531cf 7615 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
7616 vec_oprnds3 = vec_defs.pop ();
7617 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
7618 if (!masked)
7619 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 7620 vec_oprnds0 = vec_defs.pop ();
f7e531cf 7621
9771b263
DN
7622 ops.release ();
7623 vec_defs.release ();
f7e531cf
IR
7624 }
7625 else
7626 {
355fe088 7627 gimple *gtemp;
a414c77f
IE
7628 if (masked)
7629 {
7630 vec_cond_lhs
7631 = vect_get_vec_def_for_operand (cond_expr, stmt,
7632 comp_vectype);
7633 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7634 &gtemp, &dts[0]);
7635 }
7636 else
7637 {
7638 vec_cond_lhs =
7639 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7640 stmt, comp_vectype);
7641 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7642 loop_vinfo, &gtemp, &dts[0]);
7643
7644 vec_cond_rhs =
7645 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7646 stmt, comp_vectype);
7647 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7648 loop_vinfo, &gtemp, &dts[1]);
7649 }
f7e531cf
IR
7650 if (reduc_index == 1)
7651 vec_then_clause = reduc_def;
7652 else
7653 {
7654 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
7655 stmt);
7656 vect_is_simple_use (then_clause, loop_vinfo,
7657 &gtemp, &dts[2]);
f7e531cf
IR
7658 }
7659 if (reduc_index == 2)
7660 vec_else_clause = reduc_def;
7661 else
7662 {
7663 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
7664 stmt);
7665 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 7666 }
a855b1b1
MM
7667 }
7668 }
7669 else
7670 {
a414c77f
IE
7671 vec_cond_lhs
7672 = vect_get_vec_def_for_stmt_copy (dts[0],
7673 vec_oprnds0.pop ());
7674 if (!masked)
7675 vec_cond_rhs
7676 = vect_get_vec_def_for_stmt_copy (dts[1],
7677 vec_oprnds1.pop ());
7678
a855b1b1 7679 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 7680 vec_oprnds2.pop ());
a855b1b1 7681 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 7682 vec_oprnds3.pop ());
f7e531cf
IR
7683 }
7684
7685 if (!slp_node)
7686 {
9771b263 7687 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
7688 if (!masked)
7689 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
7690 vec_oprnds2.quick_push (vec_then_clause);
7691 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
7692 }
7693
9dc3f7de 7694 /* Arguments are ready. Create the new vector stmt. */
9771b263 7695 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 7696 {
9771b263
DN
7697 vec_then_clause = vec_oprnds2[i];
7698 vec_else_clause = vec_oprnds3[i];
a855b1b1 7699
a414c77f
IE
7700 if (masked)
7701 vec_compare = vec_cond_lhs;
7702 else
7703 {
7704 vec_cond_rhs = vec_oprnds1[i];
7705 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7706 vec_cond_lhs, vec_cond_rhs);
7707 }
5958f9e2
JJ
7708 new_temp = make_ssa_name (vec_dest);
7709 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7710 vec_compare, vec_then_clause,
7711 vec_else_clause);
f7e531cf
IR
7712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7713 if (slp_node)
9771b263 7714 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
7715 }
7716
7717 if (slp_node)
7718 continue;
7719
7720 if (j == 0)
7721 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7722 else
7723 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7724
7725 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 7726 }
b8698a0f 7727
9771b263
DN
7728 vec_oprnds0.release ();
7729 vec_oprnds1.release ();
7730 vec_oprnds2.release ();
7731 vec_oprnds3.release ();
f7e531cf 7732
ebfd146a
IR
7733 return true;
7734}
7735
42fd8198
IE
7736/* vectorizable_comparison.
7737
7738 Check if STMT is comparison expression that can be vectorized.
7739 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7740 comparison, put it in VEC_STMT, and insert it at GSI.
7741
7742 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7743
fce57248 7744static bool
42fd8198
IE
7745vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7746 gimple **vec_stmt, tree reduc_def,
7747 slp_tree slp_node)
7748{
7749 tree lhs, rhs1, rhs2;
7750 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7751 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7752 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7753 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7754 tree new_temp;
7755 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7756 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7757 unsigned nunits;
7758 int ncopies;
7759 enum tree_code code;
7760 stmt_vec_info prev_stmt_info = NULL;
7761 int i, j;
7762 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7763 vec<tree> vec_oprnds0 = vNULL;
7764 vec<tree> vec_oprnds1 = vNULL;
7765 gimple *def_stmt;
7766 tree mask_type;
7767 tree mask;
7768
c245362b
IE
7769 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7770 return false;
7771
30480bcd 7772 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
7773 return false;
7774
7775 mask_type = vectype;
7776 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7777
fce57248 7778 if (slp_node)
42fd8198
IE
7779 ncopies = 1;
7780 else
7781 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7782
7783 gcc_assert (ncopies >= 1);
42fd8198
IE
7784 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7785 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7786 && reduc_def))
7787 return false;
7788
7789 if (STMT_VINFO_LIVE_P (stmt_info))
7790 {
7791 if (dump_enabled_p ())
7792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7793 "value used after loop.\n");
7794 return false;
7795 }
7796
7797 if (!is_gimple_assign (stmt))
7798 return false;
7799
7800 code = gimple_assign_rhs_code (stmt);
7801
7802 if (TREE_CODE_CLASS (code) != tcc_comparison)
7803 return false;
7804
7805 rhs1 = gimple_assign_rhs1 (stmt);
7806 rhs2 = gimple_assign_rhs2 (stmt);
7807
7808 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7809 &dts[0], &vectype1))
7810 return false;
7811
7812 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7813 &dts[1], &vectype2))
7814 return false;
7815
7816 if (vectype1 && vectype2
7817 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7818 return false;
7819
7820 vectype = vectype1 ? vectype1 : vectype2;
7821
7822 /* Invariant comparison. */
7823 if (!vectype)
7824 {
69a9a66f
RB
7825 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7826 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
42fd8198
IE
7827 return false;
7828 }
7829 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7830 return false;
7831
7832 if (!vec_stmt)
7833 {
7834 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7835 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7836 return expand_vec_cmp_expr_p (vectype, mask_type);
7837 }
7838
7839 /* Transform. */
7840 if (!slp_node)
7841 {
7842 vec_oprnds0.create (1);
7843 vec_oprnds1.create (1);
7844 }
7845
7846 /* Handle def. */
7847 lhs = gimple_assign_lhs (stmt);
7848 mask = vect_create_destination_var (lhs, mask_type);
7849
7850 /* Handle cmp expr. */
7851 for (j = 0; j < ncopies; j++)
7852 {
7853 gassign *new_stmt = NULL;
7854 if (j == 0)
7855 {
7856 if (slp_node)
7857 {
7858 auto_vec<tree, 2> ops;
7859 auto_vec<vec<tree>, 2> vec_defs;
7860
7861 ops.safe_push (rhs1);
7862 ops.safe_push (rhs2);
7863 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7864 vec_oprnds1 = vec_defs.pop ();
7865 vec_oprnds0 = vec_defs.pop ();
7866 }
7867 else
7868 {
e4af0bc4
IE
7869 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7870 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
7871 }
7872 }
7873 else
7874 {
7875 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7876 vec_oprnds0.pop ());
7877 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7878 vec_oprnds1.pop ());
7879 }
7880
7881 if (!slp_node)
7882 {
7883 vec_oprnds0.quick_push (vec_rhs1);
7884 vec_oprnds1.quick_push (vec_rhs2);
7885 }
7886
7887 /* Arguments are ready. Create the new vector stmt. */
7888 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7889 {
7890 vec_rhs2 = vec_oprnds1[i];
7891
7892 new_temp = make_ssa_name (mask);
7893 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7894 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7895 if (slp_node)
7896 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7897 }
7898
7899 if (slp_node)
7900 continue;
7901
7902 if (j == 0)
7903 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7904 else
7905 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7906
7907 prev_stmt_info = vinfo_for_stmt (new_stmt);
7908 }
7909
7910 vec_oprnds0.release ();
7911 vec_oprnds1.release ();
7912
7913 return true;
7914}
ebfd146a 7915
8644a673 7916/* Make sure the statement is vectorizable. */
ebfd146a
IR
7917
7918bool
355fe088 7919vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 7920{
8644a673 7921 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 7922 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 7923 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 7924 bool ok;
a70d6342 7925 tree scalar_type, vectype;
355fe088 7926 gimple *pattern_stmt;
363477c0 7927 gimple_seq pattern_def_seq;
ebfd146a 7928
73fbfcad 7929 if (dump_enabled_p ())
ebfd146a 7930 {
78c60e3d
SS
7931 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7932 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 7933 }
ebfd146a 7934
1825a1f3 7935 if (gimple_has_volatile_ops (stmt))
b8698a0f 7936 {
73fbfcad 7937 if (dump_enabled_p ())
78c60e3d 7938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7939 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
7940
7941 return false;
7942 }
b8698a0f
L
7943
7944 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
7945 to include:
7946 - the COND_EXPR which is the loop exit condition
7947 - any LABEL_EXPRs in the loop
b8698a0f 7948 - computations that are used only for array indexing or loop control.
8644a673 7949 In basic blocks we only analyze statements that are a part of some SLP
83197f37 7950 instance, therefore, all the statements are relevant.
ebfd146a 7951
d092494c 7952 Pattern statement needs to be analyzed instead of the original statement
83197f37 7953 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
7954 statements. In basic blocks we are called from some SLP instance
7955 traversal, don't analyze pattern stmts instead, the pattern stmts
7956 already will be part of SLP instance. */
83197f37
IR
7957
7958 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 7959 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 7960 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 7961 {
9d5e7640 7962 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 7963 && pattern_stmt
9d5e7640
IR
7964 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7965 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7966 {
83197f37 7967 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
7968 stmt = pattern_stmt;
7969 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 7970 if (dump_enabled_p ())
9d5e7640 7971 {
78c60e3d
SS
7972 dump_printf_loc (MSG_NOTE, vect_location,
7973 "==> examining pattern statement: ");
7974 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
7975 }
7976 }
7977 else
7978 {
73fbfcad 7979 if (dump_enabled_p ())
e645e942 7980 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 7981
9d5e7640
IR
7982 return true;
7983 }
8644a673 7984 }
83197f37 7985 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 7986 && node == NULL
83197f37
IR
7987 && pattern_stmt
7988 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7989 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7990 {
7991 /* Analyze PATTERN_STMT too. */
73fbfcad 7992 if (dump_enabled_p ())
83197f37 7993 {
78c60e3d
SS
7994 dump_printf_loc (MSG_NOTE, vect_location,
7995 "==> examining pattern statement: ");
7996 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
7997 }
7998
7999 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8000 return false;
8001 }
ebfd146a 8002
1107f3ae 8003 if (is_pattern_stmt_p (stmt_info)
079c527f 8004 && node == NULL
363477c0 8005 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 8006 {
363477c0 8007 gimple_stmt_iterator si;
1107f3ae 8008
363477c0
JJ
8009 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8010 {
355fe088 8011 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
8012 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8013 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8014 {
8015 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8016 if (dump_enabled_p ())
363477c0 8017 {
78c60e3d
SS
8018 dump_printf_loc (MSG_NOTE, vect_location,
8019 "==> examining pattern def statement: ");
8020 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8021 }
1107f3ae 8022
363477c0
JJ
8023 if (!vect_analyze_stmt (pattern_def_stmt,
8024 need_to_vectorize, node))
8025 return false;
8026 }
8027 }
8028 }
1107f3ae 8029
8644a673
IR
8030 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8031 {
8032 case vect_internal_def:
8033 break;
ebfd146a 8034
8644a673 8035 case vect_reduction_def:
7c5222ff 8036 case vect_nested_cycle:
14a61437
RB
8037 gcc_assert (!bb_vinfo
8038 && (relevance == vect_used_in_outer
8039 || relevance == vect_used_in_outer_by_reduction
8040 || relevance == vect_used_by_reduction
b28ead45
AH
8041 || relevance == vect_unused_in_scope
8042 || relevance == vect_used_only_live));
8644a673
IR
8043 break;
8044
8045 case vect_induction_def:
8046 case vect_constant_def:
8047 case vect_external_def:
8048 case vect_unknown_def_type:
8049 default:
8050 gcc_unreachable ();
8051 }
ebfd146a 8052
a70d6342
IR
8053 if (bb_vinfo)
8054 {
8055 gcc_assert (PURE_SLP_STMT (stmt_info));
8056
b690cc0f 8057 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 8058 if (dump_enabled_p ())
a70d6342 8059 {
78c60e3d
SS
8060 dump_printf_loc (MSG_NOTE, vect_location,
8061 "get vectype for scalar type: ");
8062 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 8063 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
8064 }
8065
8066 vectype = get_vectype_for_scalar_type (scalar_type);
8067 if (!vectype)
8068 {
73fbfcad 8069 if (dump_enabled_p ())
a70d6342 8070 {
78c60e3d
SS
8071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8072 "not SLPed: unsupported data-type ");
8073 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8074 scalar_type);
e645e942 8075 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
8076 }
8077 return false;
8078 }
8079
73fbfcad 8080 if (dump_enabled_p ())
a70d6342 8081 {
78c60e3d
SS
8082 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8083 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 8084 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
8085 }
8086
8087 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8088 }
8089
8644a673 8090 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8091 {
8644a673 8092 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8093 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8094 || (is_gimple_call (stmt)
8095 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8096 *need_to_vectorize = true;
ebfd146a
IR
8097 }
8098
b1af7da6
RB
8099 if (PURE_SLP_STMT (stmt_info) && !node)
8100 {
8101 dump_printf_loc (MSG_NOTE, vect_location,
8102 "handled only by SLP analysis\n");
8103 return true;
8104 }
8105
8106 ok = true;
8107 if (!bb_vinfo
8108 && (STMT_VINFO_RELEVANT_P (stmt_info)
8109 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8110 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8111 || vectorizable_conversion (stmt, NULL, NULL, node)
8112 || vectorizable_shift (stmt, NULL, NULL, node)
8113 || vectorizable_operation (stmt, NULL, NULL, node)
8114 || vectorizable_assignment (stmt, NULL, NULL, node)
8115 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8116 || vectorizable_call (stmt, NULL, NULL, node)
8117 || vectorizable_store (stmt, NULL, NULL, node)
8118 || vectorizable_reduction (stmt, NULL, NULL, node)
42fd8198
IE
8119 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8120 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8121 else
8122 {
8123 if (bb_vinfo)
8124 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8125 || vectorizable_conversion (stmt, NULL, NULL, node)
8126 || vectorizable_shift (stmt, NULL, NULL, node)
8127 || vectorizable_operation (stmt, NULL, NULL, node)
8128 || vectorizable_assignment (stmt, NULL, NULL, node)
8129 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8130 || vectorizable_call (stmt, NULL, NULL, node)
8131 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8132 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8133 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8134 }
8644a673
IR
8135
8136 if (!ok)
ebfd146a 8137 {
73fbfcad 8138 if (dump_enabled_p ())
8644a673 8139 {
78c60e3d
SS
8140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8141 "not vectorized: relevant stmt not ");
8142 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8143 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8144 }
b8698a0f 8145
ebfd146a
IR
8146 return false;
8147 }
8148
a70d6342
IR
8149 if (bb_vinfo)
8150 return true;
8151
8644a673
IR
8152 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8153 need extra handling, except for vectorizable reductions. */
8154 if (STMT_VINFO_LIVE_P (stmt_info)
8155 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
b28ead45 8156 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
ebfd146a 8157
8644a673 8158 if (!ok)
ebfd146a 8159 {
73fbfcad 8160 if (dump_enabled_p ())
8644a673 8161 {
78c60e3d
SS
8162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8163 "not vectorized: live stmt not ");
8164 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8165 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8166 }
b8698a0f 8167
8644a673 8168 return false;
ebfd146a
IR
8169 }
8170
ebfd146a
IR
8171 return true;
8172}
8173
8174
8175/* Function vect_transform_stmt.
8176
8177 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8178
8179bool
355fe088 8180vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8181 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8182 slp_instance slp_node_instance)
8183{
8184 bool is_store = false;
355fe088 8185 gimple *vec_stmt = NULL;
ebfd146a 8186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8187 bool done;
ebfd146a 8188
fce57248 8189 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 8190 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8191
ebfd146a
IR
8192 switch (STMT_VINFO_TYPE (stmt_info))
8193 {
8194 case type_demotion_vec_info_type:
ebfd146a 8195 case type_promotion_vec_info_type:
ebfd146a
IR
8196 case type_conversion_vec_info_type:
8197 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8198 gcc_assert (done);
8199 break;
8200
8201 case induc_vec_info_type:
8202 gcc_assert (!slp_node);
8203 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8204 gcc_assert (done);
8205 break;
8206
9dc3f7de
IR
8207 case shift_vec_info_type:
8208 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8209 gcc_assert (done);
8210 break;
8211
ebfd146a
IR
8212 case op_vec_info_type:
8213 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8214 gcc_assert (done);
8215 break;
8216
8217 case assignment_vec_info_type:
8218 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8219 gcc_assert (done);
8220 break;
8221
8222 case load_vec_info_type:
b8698a0f 8223 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
8224 slp_node_instance);
8225 gcc_assert (done);
8226 break;
8227
8228 case store_vec_info_type:
8229 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8230 gcc_assert (done);
0d0293ac 8231 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
8232 {
8233 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 8234 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
8235 one are skipped, and there vec_stmt_info shouldn't be freed
8236 meanwhile. */
0d0293ac 8237 *grouped_store = true;
ebfd146a
IR
8238 if (STMT_VINFO_VEC_STMT (stmt_info))
8239 is_store = true;
8240 }
8241 else
8242 is_store = true;
8243 break;
8244
8245 case condition_vec_info_type:
f7e531cf 8246 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
8247 gcc_assert (done);
8248 break;
8249
42fd8198
IE
8250 case comparison_vec_info_type:
8251 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8252 gcc_assert (done);
8253 break;
8254
ebfd146a 8255 case call_vec_info_type:
190c2236 8256 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 8257 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
8258 if (is_gimple_call (stmt)
8259 && gimple_call_internal_p (stmt)
8260 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8261 is_store = true;
ebfd146a
IR
8262 break;
8263
0136f8f0
AH
8264 case call_simd_clone_vec_info_type:
8265 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8266 stmt = gsi_stmt (*gsi);
8267 break;
8268
ebfd146a 8269 case reduc_vec_info_type:
b5aeb3bb 8270 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8271 gcc_assert (done);
8272 break;
8273
8274 default:
8275 if (!STMT_VINFO_LIVE_P (stmt_info))
8276 {
73fbfcad 8277 if (dump_enabled_p ())
78c60e3d 8278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8279 "stmt not supported.\n");
ebfd146a
IR
8280 gcc_unreachable ();
8281 }
8282 }
8283
225ce44b
RB
8284 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8285 This would break hybrid SLP vectorization. */
8286 if (slp_node)
d90f8440
RB
8287 gcc_assert (!vec_stmt
8288 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 8289
ebfd146a
IR
8290 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8291 is being vectorized, but outside the immediately enclosing loop. */
8292 if (vec_stmt
a70d6342
IR
8293 && STMT_VINFO_LOOP_VINFO (stmt_info)
8294 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8295 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
8296 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8297 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 8298 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 8299 vect_used_in_outer_by_reduction))
ebfd146a 8300 {
a70d6342
IR
8301 struct loop *innerloop = LOOP_VINFO_LOOP (
8302 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
8303 imm_use_iterator imm_iter;
8304 use_operand_p use_p;
8305 tree scalar_dest;
355fe088 8306 gimple *exit_phi;
ebfd146a 8307
73fbfcad 8308 if (dump_enabled_p ())
78c60e3d 8309 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 8310 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
8311
8312 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8313 (to be used when vectorizing outer-loop stmts that use the DEF of
8314 STMT). */
8315 if (gimple_code (stmt) == GIMPLE_PHI)
8316 scalar_dest = PHI_RESULT (stmt);
8317 else
8318 scalar_dest = gimple_assign_lhs (stmt);
8319
8320 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8321 {
8322 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8323 {
8324 exit_phi = USE_STMT (use_p);
8325 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8326 }
8327 }
8328 }
8329
8330 /* Handle stmts whose DEF is used outside the loop-nest that is
8331 being vectorized. */
b28ead45
AH
8332 if (slp_node)
8333 {
8334 gimple *slp_stmt;
8335 int i;
8336 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8337 {
8338 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8339 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8340 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8341 {
8342 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8343 &vec_stmt);
8344 gcc_assert (done);
8345 }
8346 }
8347 }
8348 else if (STMT_VINFO_LIVE_P (stmt_info)
ebfd146a
IR
8349 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8350 {
b28ead45 8351 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
ebfd146a
IR
8352 gcc_assert (done);
8353 }
8354
8355 if (vec_stmt)
83197f37 8356 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 8357
b8698a0f 8358 return is_store;
ebfd146a
IR
8359}
8360
8361
b8698a0f 8362/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
8363 stmt_vec_info. */
8364
8365void
355fe088 8366vect_remove_stores (gimple *first_stmt)
ebfd146a 8367{
355fe088
TS
8368 gimple *next = first_stmt;
8369 gimple *tmp;
ebfd146a
IR
8370 gimple_stmt_iterator next_si;
8371
8372 while (next)
8373 {
78048b1c
JJ
8374 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8375
8376 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8377 if (is_pattern_stmt_p (stmt_info))
8378 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
8379 /* Free the attached stmt_vec_info and remove the stmt. */
8380 next_si = gsi_for_stmt (next);
3d3f2249 8381 unlink_stmt_vdef (next);
ebfd146a 8382 gsi_remove (&next_si, true);
3d3f2249 8383 release_defs (next);
ebfd146a
IR
8384 free_stmt_vec_info (next);
8385 next = tmp;
8386 }
8387}
8388
8389
8390/* Function new_stmt_vec_info.
8391
8392 Create and initialize a new stmt_vec_info struct for STMT. */
8393
8394stmt_vec_info
310213d4 8395new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
8396{
8397 stmt_vec_info res;
8398 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8399
8400 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8401 STMT_VINFO_STMT (res) = stmt;
310213d4 8402 res->vinfo = vinfo;
8644a673 8403 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
8404 STMT_VINFO_LIVE_P (res) = false;
8405 STMT_VINFO_VECTYPE (res) = NULL;
8406 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 8407 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
8408 STMT_VINFO_IN_PATTERN_P (res) = false;
8409 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 8410 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 8411 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 8412 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
ebfd146a
IR
8413
8414 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8415 STMT_VINFO_DR_OFFSET (res) = NULL;
8416 STMT_VINFO_DR_INIT (res) = NULL;
8417 STMT_VINFO_DR_STEP (res) = NULL;
8418 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8419
8420 if (gimple_code (stmt) == GIMPLE_PHI
8421 && is_loop_header_bb_p (gimple_bb (stmt)))
8422 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8423 else
8644a673
IR
8424 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8425
9771b263 8426 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 8427 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
8428 STMT_VINFO_NUM_SLP_USES (res) = 0;
8429
e14c1050
IR
8430 GROUP_FIRST_ELEMENT (res) = NULL;
8431 GROUP_NEXT_ELEMENT (res) = NULL;
8432 GROUP_SIZE (res) = 0;
8433 GROUP_STORE_COUNT (res) = 0;
8434 GROUP_GAP (res) = 0;
8435 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
8436
8437 return res;
8438}
8439
8440
8441/* Create a hash table for stmt_vec_info. */
8442
8443void
8444init_stmt_vec_info_vec (void)
8445{
9771b263
DN
8446 gcc_assert (!stmt_vec_info_vec.exists ());
8447 stmt_vec_info_vec.create (50);
ebfd146a
IR
8448}
8449
8450
8451/* Free hash table for stmt_vec_info. */
8452
8453void
8454free_stmt_vec_info_vec (void)
8455{
93675444 8456 unsigned int i;
3161455c 8457 stmt_vec_info info;
93675444
JJ
8458 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8459 if (info != NULL)
3161455c 8460 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
8461 gcc_assert (stmt_vec_info_vec.exists ());
8462 stmt_vec_info_vec.release ();
ebfd146a
IR
8463}
8464
8465
8466/* Free stmt vectorization related info. */
8467
8468void
355fe088 8469free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
8470{
8471 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8472
8473 if (!stmt_info)
8474 return;
8475
78048b1c
JJ
8476 /* Check if this statement has a related "pattern stmt"
8477 (introduced by the vectorizer during the pattern recognition
8478 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8479 too. */
8480 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8481 {
8482 stmt_vec_info patt_info
8483 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8484 if (patt_info)
8485 {
363477c0 8486 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 8487 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
8488 gimple_set_bb (patt_stmt, NULL);
8489 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 8490 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 8491 release_ssa_name (lhs);
363477c0
JJ
8492 if (seq)
8493 {
8494 gimple_stmt_iterator si;
8495 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 8496 {
355fe088 8497 gimple *seq_stmt = gsi_stmt (si);
f0281fde 8498 gimple_set_bb (seq_stmt, NULL);
7532abf2 8499 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 8500 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
8501 release_ssa_name (lhs);
8502 free_stmt_vec_info (seq_stmt);
8503 }
363477c0 8504 }
f0281fde 8505 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
8506 }
8507 }
8508
9771b263 8509 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 8510 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
8511 set_vinfo_for_stmt (stmt, NULL);
8512 free (stmt_info);
8513}
8514
8515
bb67d9c7 8516/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 8517
bb67d9c7 8518 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
8519 by the target. */
8520
bb67d9c7
RG
8521static tree
8522get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 8523{
ef4bddc2
RS
8524 machine_mode inner_mode = TYPE_MODE (scalar_type);
8525 machine_mode simd_mode;
2f816591 8526 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
8527 int nunits;
8528 tree vectype;
8529
cc4b5170 8530 if (nbytes == 0)
ebfd146a
IR
8531 return NULL_TREE;
8532
48f2e373
RB
8533 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8534 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8535 return NULL_TREE;
8536
7b7b1813
RG
8537 /* For vector types of elements whose mode precision doesn't
8538 match their types precision we use a element type of mode
8539 precision. The vectorization routines will have to make sure
48f2e373
RB
8540 they support the proper result truncation/extension.
8541 We also make sure to build vector types with INTEGER_TYPE
8542 component type only. */
6d7971b8 8543 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
8544 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8545 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
8546 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8547 TYPE_UNSIGNED (scalar_type));
6d7971b8 8548
ccbf5bb4
RG
8549 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8550 When the component mode passes the above test simply use a type
8551 corresponding to that mode. The theory is that any use that
8552 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 8553 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 8554 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
8555 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8556
8557 /* We can't build a vector type of elements with alignment bigger than
8558 their size. */
dfc2e2ac 8559 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
8560 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8561 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 8562
dfc2e2ac
RB
8563 /* If we felt back to using the mode fail if there was
8564 no scalar type for it. */
8565 if (scalar_type == NULL_TREE)
8566 return NULL_TREE;
8567
bb67d9c7
RG
8568 /* If no size was supplied use the mode the target prefers. Otherwise
8569 lookup a vector mode of the specified size. */
8570 if (size == 0)
8571 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8572 else
8573 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
8574 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8575 if (nunits <= 1)
8576 return NULL_TREE;
ebfd146a
IR
8577
8578 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
8579
8580 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8581 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 8582 return NULL_TREE;
ebfd146a
IR
8583
8584 return vectype;
8585}
8586
bb67d9c7
RG
8587unsigned int current_vector_size;
8588
8589/* Function get_vectype_for_scalar_type.
8590
8591 Returns the vector type corresponding to SCALAR_TYPE as supported
8592 by the target. */
8593
8594tree
8595get_vectype_for_scalar_type (tree scalar_type)
8596{
8597 tree vectype;
8598 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8599 current_vector_size);
8600 if (vectype
8601 && current_vector_size == 0)
8602 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8603 return vectype;
8604}
8605
42fd8198
IE
8606/* Function get_mask_type_for_scalar_type.
8607
8608 Returns the mask type corresponding to a result of comparison
8609 of vectors of specified SCALAR_TYPE as supported by target. */
8610
8611tree
8612get_mask_type_for_scalar_type (tree scalar_type)
8613{
8614 tree vectype = get_vectype_for_scalar_type (scalar_type);
8615
8616 if (!vectype)
8617 return NULL;
8618
8619 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8620 current_vector_size);
8621}
8622
b690cc0f
RG
8623/* Function get_same_sized_vectype
8624
8625 Returns a vector type corresponding to SCALAR_TYPE of size
8626 VECTOR_TYPE if supported by the target. */
8627
8628tree
bb67d9c7 8629get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 8630{
9f47c7e5
IE
8631 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8632 return build_same_sized_truth_vector_type (vector_type);
8633
bb67d9c7
RG
8634 return get_vectype_for_scalar_type_and_size
8635 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
8636}
8637
ebfd146a
IR
8638/* Function vect_is_simple_use.
8639
8640 Input:
81c40241
RB
8641 VINFO - the vect info of the loop or basic block that is being vectorized.
8642 OPERAND - operand in the loop or bb.
8643 Output:
8644 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8645 DT - the type of definition
ebfd146a
IR
8646
8647 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 8648 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 8649 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 8650 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
8651 is the case in reduction/induction computations).
8652 For basic blocks, supportable operands are constants and bb invariants.
8653 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
8654
8655bool
81c40241
RB
8656vect_is_simple_use (tree operand, vec_info *vinfo,
8657 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 8658{
ebfd146a 8659 *def_stmt = NULL;
3fc356dc 8660 *dt = vect_unknown_def_type;
b8698a0f 8661
73fbfcad 8662 if (dump_enabled_p ())
ebfd146a 8663 {
78c60e3d
SS
8664 dump_printf_loc (MSG_NOTE, vect_location,
8665 "vect_is_simple_use: operand ");
8666 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 8667 dump_printf (MSG_NOTE, "\n");
ebfd146a 8668 }
b8698a0f 8669
b758f602 8670 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
8671 {
8672 *dt = vect_constant_def;
8673 return true;
8674 }
b8698a0f 8675
ebfd146a
IR
8676 if (is_gimple_min_invariant (operand))
8677 {
8644a673 8678 *dt = vect_external_def;
ebfd146a
IR
8679 return true;
8680 }
8681
ebfd146a
IR
8682 if (TREE_CODE (operand) != SSA_NAME)
8683 {
73fbfcad 8684 if (dump_enabled_p ())
af29617a
AH
8685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8686 "not ssa-name.\n");
ebfd146a
IR
8687 return false;
8688 }
b8698a0f 8689
3fc356dc 8690 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 8691 {
3fc356dc
RB
8692 *dt = vect_external_def;
8693 return true;
ebfd146a
IR
8694 }
8695
3fc356dc 8696 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 8697 if (dump_enabled_p ())
ebfd146a 8698 {
78c60e3d
SS
8699 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8700 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
8701 }
8702
61d371eb 8703 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 8704 *dt = vect_external_def;
ebfd146a
IR
8705 else
8706 {
3fc356dc 8707 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 8708 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
8709 }
8710
2e8ab70c
RB
8711 if (dump_enabled_p ())
8712 {
8713 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8714 switch (*dt)
8715 {
8716 case vect_uninitialized_def:
8717 dump_printf (MSG_NOTE, "uninitialized\n");
8718 break;
8719 case vect_constant_def:
8720 dump_printf (MSG_NOTE, "constant\n");
8721 break;
8722 case vect_external_def:
8723 dump_printf (MSG_NOTE, "external\n");
8724 break;
8725 case vect_internal_def:
8726 dump_printf (MSG_NOTE, "internal\n");
8727 break;
8728 case vect_induction_def:
8729 dump_printf (MSG_NOTE, "induction\n");
8730 break;
8731 case vect_reduction_def:
8732 dump_printf (MSG_NOTE, "reduction\n");
8733 break;
8734 case vect_double_reduction_def:
8735 dump_printf (MSG_NOTE, "double reduction\n");
8736 break;
8737 case vect_nested_cycle:
8738 dump_printf (MSG_NOTE, "nested cycle\n");
8739 break;
8740 case vect_unknown_def_type:
8741 dump_printf (MSG_NOTE, "unknown\n");
8742 break;
8743 }
8744 }
8745
81c40241 8746 if (*dt == vect_unknown_def_type)
ebfd146a 8747 {
73fbfcad 8748 if (dump_enabled_p ())
78c60e3d 8749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8750 "Unsupported pattern.\n");
ebfd146a
IR
8751 return false;
8752 }
8753
ebfd146a
IR
8754 switch (gimple_code (*def_stmt))
8755 {
8756 case GIMPLE_PHI:
ebfd146a 8757 case GIMPLE_ASSIGN:
ebfd146a 8758 case GIMPLE_CALL:
81c40241 8759 break;
ebfd146a 8760 default:
73fbfcad 8761 if (dump_enabled_p ())
78c60e3d 8762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8763 "unsupported defining stmt:\n");
ebfd146a
IR
8764 return false;
8765 }
8766
8767 return true;
8768}
8769
81c40241 8770/* Function vect_is_simple_use.
b690cc0f 8771
81c40241 8772 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
8773 type of OPERAND and stores it to *VECTYPE. If the definition of
8774 OPERAND is vect_uninitialized_def, vect_constant_def or
8775 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8776 is responsible to compute the best suited vector type for the
8777 scalar operand. */
8778
8779bool
81c40241
RB
8780vect_is_simple_use (tree operand, vec_info *vinfo,
8781 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 8782{
81c40241 8783 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
8784 return false;
8785
8786 /* Now get a vector type if the def is internal, otherwise supply
8787 NULL_TREE and leave it up to the caller to figure out a proper
8788 type for the use stmt. */
8789 if (*dt == vect_internal_def
8790 || *dt == vect_induction_def
8791 || *dt == vect_reduction_def
8792 || *dt == vect_double_reduction_def
8793 || *dt == vect_nested_cycle)
8794 {
8795 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
8796
8797 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8798 && !STMT_VINFO_RELEVANT (stmt_info)
8799 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 8800 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 8801
b690cc0f
RG
8802 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8803 gcc_assert (*vectype != NULL_TREE);
8804 }
8805 else if (*dt == vect_uninitialized_def
8806 || *dt == vect_constant_def
8807 || *dt == vect_external_def)
8808 *vectype = NULL_TREE;
8809 else
8810 gcc_unreachable ();
8811
8812 return true;
8813}
8814
ebfd146a
IR
8815
8816/* Function supportable_widening_operation
8817
b8698a0f
L
8818 Check whether an operation represented by the code CODE is a
8819 widening operation that is supported by the target platform in
b690cc0f
RG
8820 vector form (i.e., when operating on arguments of type VECTYPE_IN
8821 producing a result of type VECTYPE_OUT).
b8698a0f 8822
ebfd146a
IR
8823 Widening operations we currently support are NOP (CONVERT), FLOAT
8824 and WIDEN_MULT. This function checks if these operations are supported
8825 by the target platform either directly (via vector tree-codes), or via
8826 target builtins.
8827
8828 Output:
b8698a0f
L
8829 - CODE1 and CODE2 are codes of vector operations to be used when
8830 vectorizing the operation, if available.
ebfd146a
IR
8831 - MULTI_STEP_CVT determines the number of required intermediate steps in
8832 case of multi-step conversion (like char->short->int - in that case
8833 MULTI_STEP_CVT will be 1).
b8698a0f
L
8834 - INTERM_TYPES contains the intermediate type required to perform the
8835 widening operation (short in the above example). */
ebfd146a
IR
8836
8837bool
355fe088 8838supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 8839 tree vectype_out, tree vectype_in,
ebfd146a
IR
8840 enum tree_code *code1, enum tree_code *code2,
8841 int *multi_step_cvt,
9771b263 8842 vec<tree> *interm_types)
ebfd146a
IR
8843{
8844 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8845 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 8846 struct loop *vect_loop = NULL;
ef4bddc2 8847 machine_mode vec_mode;
81f40b79 8848 enum insn_code icode1, icode2;
ebfd146a 8849 optab optab1, optab2;
b690cc0f
RG
8850 tree vectype = vectype_in;
8851 tree wide_vectype = vectype_out;
ebfd146a 8852 enum tree_code c1, c2;
4a00c761
JJ
8853 int i;
8854 tree prev_type, intermediate_type;
ef4bddc2 8855 machine_mode intermediate_mode, prev_mode;
4a00c761 8856 optab optab3, optab4;
ebfd146a 8857
4a00c761 8858 *multi_step_cvt = 0;
4ef69dfc
IR
8859 if (loop_info)
8860 vect_loop = LOOP_VINFO_LOOP (loop_info);
8861
ebfd146a
IR
8862 switch (code)
8863 {
8864 case WIDEN_MULT_EXPR:
6ae6116f
RH
8865 /* The result of a vectorized widening operation usually requires
8866 two vectors (because the widened results do not fit into one vector).
8867 The generated vector results would normally be expected to be
8868 generated in the same order as in the original scalar computation,
8869 i.e. if 8 results are generated in each vector iteration, they are
8870 to be organized as follows:
8871 vect1: [res1,res2,res3,res4],
8872 vect2: [res5,res6,res7,res8].
8873
8874 However, in the special case that the result of the widening
8875 operation is used in a reduction computation only, the order doesn't
8876 matter (because when vectorizing a reduction we change the order of
8877 the computation). Some targets can take advantage of this and
8878 generate more efficient code. For example, targets like Altivec,
8879 that support widen_mult using a sequence of {mult_even,mult_odd}
8880 generate the following vectors:
8881 vect1: [res1,res3,res5,res7],
8882 vect2: [res2,res4,res6,res8].
8883
8884 When vectorizing outer-loops, we execute the inner-loop sequentially
8885 (each vectorized inner-loop iteration contributes to VF outer-loop
8886 iterations in parallel). We therefore don't allow to change the
8887 order of the computation in the inner-loop during outer-loop
8888 vectorization. */
8889 /* TODO: Another case in which order doesn't *really* matter is when we
8890 widen and then contract again, e.g. (short)((int)x * y >> 8).
8891 Normally, pack_trunc performs an even/odd permute, whereas the
8892 repack from an even/odd expansion would be an interleave, which
8893 would be significantly simpler for e.g. AVX2. */
8894 /* In any case, in order to avoid duplicating the code below, recurse
8895 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8896 are properly set up for the caller. If we fail, we'll continue with
8897 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8898 if (vect_loop
8899 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8900 && !nested_in_vect_loop_p (vect_loop, stmt)
8901 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8902 stmt, vectype_out, vectype_in,
a86ec597
RH
8903 code1, code2, multi_step_cvt,
8904 interm_types))
ebc047a2
CH
8905 {
8906 /* Elements in a vector with vect_used_by_reduction property cannot
8907 be reordered if the use chain with this property does not have the
8908 same operation. One such an example is s += a * b, where elements
8909 in a and b cannot be reordered. Here we check if the vector defined
8910 by STMT is only directly used in the reduction statement. */
8911 tree lhs = gimple_assign_lhs (stmt);
8912 use_operand_p dummy;
355fe088 8913 gimple *use_stmt;
ebc047a2
CH
8914 stmt_vec_info use_stmt_info = NULL;
8915 if (single_imm_use (lhs, &dummy, &use_stmt)
8916 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8917 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8918 return true;
8919 }
4a00c761
JJ
8920 c1 = VEC_WIDEN_MULT_LO_EXPR;
8921 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
8922 break;
8923
81c40241
RB
8924 case DOT_PROD_EXPR:
8925 c1 = DOT_PROD_EXPR;
8926 c2 = DOT_PROD_EXPR;
8927 break;
8928
8929 case SAD_EXPR:
8930 c1 = SAD_EXPR;
8931 c2 = SAD_EXPR;
8932 break;
8933
6ae6116f
RH
8934 case VEC_WIDEN_MULT_EVEN_EXPR:
8935 /* Support the recursion induced just above. */
8936 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8937 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8938 break;
8939
36ba4aae 8940 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
8941 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8942 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
8943 break;
8944
ebfd146a 8945 CASE_CONVERT:
4a00c761
JJ
8946 c1 = VEC_UNPACK_LO_EXPR;
8947 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
8948 break;
8949
8950 case FLOAT_EXPR:
4a00c761
JJ
8951 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8952 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
8953 break;
8954
8955 case FIX_TRUNC_EXPR:
8956 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8957 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8958 computing the operation. */
8959 return false;
8960
8961 default:
8962 gcc_unreachable ();
8963 }
8964
6ae6116f 8965 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 8966 std::swap (c1, c2);
4a00c761 8967
ebfd146a
IR
8968 if (code == FIX_TRUNC_EXPR)
8969 {
8970 /* The signedness is determined from output operand. */
b690cc0f
RG
8971 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8972 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
8973 }
8974 else
8975 {
8976 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8977 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8978 }
8979
8980 if (!optab1 || !optab2)
8981 return false;
8982
8983 vec_mode = TYPE_MODE (vectype);
947131ba
RS
8984 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8985 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8986 return false;
8987
4a00c761
JJ
8988 *code1 = c1;
8989 *code2 = c2;
8990
8991 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8992 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
8993 /* For scalar masks we may have different boolean
8994 vector types having the same QImode. Thus we
8995 add additional check for elements number. */
8996 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
8997 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
8998 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761 8999
b8698a0f 9000 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 9001 types. */
ebfd146a 9002
4a00c761
JJ
9003 prev_type = vectype;
9004 prev_mode = vec_mode;
b8698a0f 9005
4a00c761
JJ
9006 if (!CONVERT_EXPR_CODE_P (code))
9007 return false;
b8698a0f 9008
4a00c761
JJ
9009 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9010 intermediate steps in promotion sequence. We try
9011 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9012 not. */
9771b263 9013 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9014 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9015 {
9016 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9017 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9018 {
9019 intermediate_type
9020 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9021 current_vector_size);
9022 if (intermediate_mode != TYPE_MODE (intermediate_type))
9023 return false;
9024 }
9025 else
9026 intermediate_type
9027 = lang_hooks.types.type_for_mode (intermediate_mode,
9028 TYPE_UNSIGNED (prev_type));
9029
4a00c761
JJ
9030 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9031 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9032
9033 if (!optab3 || !optab4
9034 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9035 || insn_data[icode1].operand[0].mode != intermediate_mode
9036 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9037 || insn_data[icode2].operand[0].mode != intermediate_mode
9038 || ((icode1 = optab_handler (optab3, intermediate_mode))
9039 == CODE_FOR_nothing)
9040 || ((icode2 = optab_handler (optab4, intermediate_mode))
9041 == CODE_FOR_nothing))
9042 break;
ebfd146a 9043
9771b263 9044 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9045 (*multi_step_cvt)++;
9046
9047 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9048 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9049 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9050 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9051 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761
JJ
9052
9053 prev_type = intermediate_type;
9054 prev_mode = intermediate_mode;
ebfd146a
IR
9055 }
9056
9771b263 9057 interm_types->release ();
4a00c761 9058 return false;
ebfd146a
IR
9059}
9060
9061
9062/* Function supportable_narrowing_operation
9063
b8698a0f
L
9064 Check whether an operation represented by the code CODE is a
9065 narrowing operation that is supported by the target platform in
b690cc0f
RG
9066 vector form (i.e., when operating on arguments of type VECTYPE_IN
9067 and producing a result of type VECTYPE_OUT).
b8698a0f 9068
ebfd146a 9069 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9070 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9071 the target platform directly via vector tree-codes.
9072
9073 Output:
b8698a0f
L
9074 - CODE1 is the code of a vector operation to be used when
9075 vectorizing the operation, if available.
ebfd146a
IR
9076 - MULTI_STEP_CVT determines the number of required intermediate steps in
9077 case of multi-step conversion (like int->short->char - in that case
9078 MULTI_STEP_CVT will be 1).
9079 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9080 narrowing operation (short in the above example). */
ebfd146a
IR
9081
9082bool
9083supportable_narrowing_operation (enum tree_code code,
b690cc0f 9084 tree vectype_out, tree vectype_in,
ebfd146a 9085 enum tree_code *code1, int *multi_step_cvt,
9771b263 9086 vec<tree> *interm_types)
ebfd146a 9087{
ef4bddc2 9088 machine_mode vec_mode;
ebfd146a
IR
9089 enum insn_code icode1;
9090 optab optab1, interm_optab;
b690cc0f
RG
9091 tree vectype = vectype_in;
9092 tree narrow_vectype = vectype_out;
ebfd146a 9093 enum tree_code c1;
3ae0661a 9094 tree intermediate_type, prev_type;
ef4bddc2 9095 machine_mode intermediate_mode, prev_mode;
ebfd146a 9096 int i;
4a00c761 9097 bool uns;
ebfd146a 9098
4a00c761 9099 *multi_step_cvt = 0;
ebfd146a
IR
9100 switch (code)
9101 {
9102 CASE_CONVERT:
9103 c1 = VEC_PACK_TRUNC_EXPR;
9104 break;
9105
9106 case FIX_TRUNC_EXPR:
9107 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9108 break;
9109
9110 case FLOAT_EXPR:
9111 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9112 tree code and optabs used for computing the operation. */
9113 return false;
9114
9115 default:
9116 gcc_unreachable ();
9117 }
9118
9119 if (code == FIX_TRUNC_EXPR)
9120 /* The signedness is determined from output operand. */
b690cc0f 9121 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9122 else
9123 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9124
9125 if (!optab1)
9126 return false;
9127
9128 vec_mode = TYPE_MODE (vectype);
947131ba 9129 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9130 return false;
9131
4a00c761
JJ
9132 *code1 = c1;
9133
9134 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9135 /* For scalar masks we may have different boolean
9136 vector types having the same QImode. Thus we
9137 add additional check for elements number. */
9138 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9139 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9140 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 9141
ebfd146a
IR
9142 /* Check if it's a multi-step conversion that can be done using intermediate
9143 types. */
4a00c761 9144 prev_mode = vec_mode;
3ae0661a 9145 prev_type = vectype;
4a00c761
JJ
9146 if (code == FIX_TRUNC_EXPR)
9147 uns = TYPE_UNSIGNED (vectype_out);
9148 else
9149 uns = TYPE_UNSIGNED (vectype);
9150
9151 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9152 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9153 costly than signed. */
9154 if (code == FIX_TRUNC_EXPR && uns)
9155 {
9156 enum insn_code icode2;
9157
9158 intermediate_type
9159 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9160 interm_optab
9161 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9162 if (interm_optab != unknown_optab
4a00c761
JJ
9163 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9164 && insn_data[icode1].operand[0].mode
9165 == insn_data[icode2].operand[0].mode)
9166 {
9167 uns = false;
9168 optab1 = interm_optab;
9169 icode1 = icode2;
9170 }
9171 }
ebfd146a 9172
4a00c761
JJ
9173 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9174 intermediate steps in promotion sequence. We try
9175 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9176 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9177 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9178 {
9179 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9180 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9181 {
9182 intermediate_type
9183 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9184 current_vector_size);
9185 if (intermediate_mode != TYPE_MODE (intermediate_type))
9186 return false;
9187 }
9188 else
9189 intermediate_type
9190 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
9191 interm_optab
9192 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9193 optab_default);
9194 if (!interm_optab
9195 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9196 || insn_data[icode1].operand[0].mode != intermediate_mode
9197 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9198 == CODE_FOR_nothing))
9199 break;
9200
9771b263 9201 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9202 (*multi_step_cvt)++;
9203
9204 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9205 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9206 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9207 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
9208
9209 prev_mode = intermediate_mode;
3ae0661a 9210 prev_type = intermediate_type;
4a00c761 9211 optab1 = interm_optab;
ebfd146a
IR
9212 }
9213
9771b263 9214 interm_types->release ();
4a00c761 9215 return false;
ebfd146a 9216}