]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR tree-optimization/71452 (Wrong optimization of stores to _Bool via char*)
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
818ab71a 2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
ebfd146a 51
7ee2468b
SB
52/* For lang_hooks.types.type_for_mode. */
53#include "langhooks.h"
ebfd146a 54
c3e7ee41
BS
55/* Return the vectorized type for the given statement. */
56
57tree
58stmt_vectype (struct _stmt_vec_info *stmt_info)
59{
60 return STMT_VINFO_VECTYPE (stmt_info);
61}
62
63/* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65bool
66stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
67{
355fe088 68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
72
73 if (!loop_vinfo)
74 return false;
75
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
77
78 return (bb->loop_father == loop->inner);
79}
80
81/* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
84
85unsigned
92345349 86record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 88 int misalign, enum vect_cost_model_location where)
c3e7ee41 89{
92345349 90 if (body_cost_vec)
c3e7ee41 91 {
92345349 92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
c3e7ee41 97 return (unsigned)
92345349 98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
99 }
100 else
310213d4
RB
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
103}
104
272c6793
RS
105/* Return a variable of type ELEM_TYPE[NELEMS]. */
106
107static tree
108create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109{
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
112}
113
114/* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
118
119static tree
355fe088 120read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
121 tree array, unsigned HOST_WIDE_INT n)
122{
123 tree vect_type, vect, vect_name, array_ref;
355fe088 124 gimple *new_stmt;
272c6793
RS
125
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
132
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
137
138 return vect_name;
139}
140
141/* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
144
145static void
355fe088 146write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
147 tree array, unsigned HOST_WIDE_INT n)
148{
149 tree array_ref;
355fe088 150 gimple *new_stmt;
272c6793
RS
151
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
158}
159
160/* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
163
164static tree
165create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166{
272c6793
RS
167 tree mem_ref, alias_ptr_type;
168
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
644ffefd 172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
173 return mem_ref;
174}
175
ebfd146a
IR
176/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177
178/* Function vect_mark_relevant.
179
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181
182static void
355fe088 183vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
97ecdb46 184 enum vect_relevant relevant, bool live_p)
ebfd146a
IR
185{
186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
187 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
188 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 189 gimple *pattern_stmt;
ebfd146a 190
73fbfcad 191 if (dump_enabled_p ())
66c16fd9
RB
192 {
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d: ", relevant, live_p);
195 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
196 }
ebfd146a 197
83197f37
IR
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
ebfd146a
IR
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
203 {
97ecdb46
JJ
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
83197f37 208
97ecdb46
JJ
209 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
210
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE, vect_location,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info = vinfo_for_stmt (pattern_stmt);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
217 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
218 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
219 stmt = pattern_stmt;
ebfd146a
IR
220 }
221
222 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
223 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
224 STMT_VINFO_RELEVANT (stmt_info) = relevant;
225
226 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
228 {
73fbfcad 229 if (dump_enabled_p ())
78c60e3d 230 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 231 "already marked relevant/live.\n");
ebfd146a
IR
232 return;
233 }
234
9771b263 235 worklist->safe_push (stmt);
ebfd146a
IR
236}
237
238
b28ead45
AH
239/* Function is_simple_and_all_uses_invariant
240
241 Return true if STMT is simple and all uses of it are invariant. */
242
243bool
244is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
245{
246 tree op;
247 gimple *def_stmt;
248 ssa_op_iter iter;
249
250 if (!is_gimple_assign (stmt))
251 return false;
252
253 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
254 {
255 enum vect_def_type dt = vect_uninitialized_def;
256
257 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
258 {
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
261 "use not simple.\n");
262 return false;
263 }
264
265 if (dt != vect_external_def && dt != vect_constant_def)
266 return false;
267 }
268 return true;
269}
270
ebfd146a
IR
271/* Function vect_stmt_relevant_p.
272
273 Return true if STMT in loop that is represented by LOOP_VINFO is
274 "relevant for vectorization".
275
276 A stmt is considered "relevant for vectorization" if:
277 - it has uses outside the loop.
278 - it has vdefs (it alters memory).
279 - control stmts in the loop (except for the exit condition).
280
281 CHECKME: what other side effects would the vectorizer allow? */
282
283static bool
355fe088 284vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
285 enum vect_relevant *relevant, bool *live_p)
286{
287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
288 ssa_op_iter op_iter;
289 imm_use_iterator imm_iter;
290 use_operand_p use_p;
291 def_operand_p def_p;
292
8644a673 293 *relevant = vect_unused_in_scope;
ebfd146a
IR
294 *live_p = false;
295
296 /* cond stmt other than loop exit cond. */
b8698a0f
L
297 if (is_ctrl_stmt (stmt)
298 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
299 != loop_exit_ctrl_vec_info_type)
8644a673 300 *relevant = vect_used_in_scope;
ebfd146a
IR
301
302 /* changing memory. */
303 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
304 if (gimple_vdef (stmt)
305 && !gimple_clobber_p (stmt))
ebfd146a 306 {
73fbfcad 307 if (dump_enabled_p ())
78c60e3d 308 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 309 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 310 *relevant = vect_used_in_scope;
ebfd146a
IR
311 }
312
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
315 {
316 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
317 {
318 basic_block bb = gimple_bb (USE_STMT (use_p));
319 if (!flow_bb_inside_loop_p (loop, bb))
320 {
73fbfcad 321 if (dump_enabled_p ())
78c60e3d 322 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 323 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 324
3157b0c2
AO
325 if (is_gimple_debug (USE_STMT (use_p)))
326 continue;
327
ebfd146a
IR
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
331 gcc_assert (bb == single_exit (loop)->dest);
332
333 *live_p = true;
334 }
335 }
336 }
337
3a2edf4c
AH
338 if (*live_p && *relevant == vect_unused_in_scope
339 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
b28ead45
AH
340 {
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE, vect_location,
343 "vec_stmt_relevant_p: stmt live but not relevant.\n");
344 *relevant = vect_used_only_live;
345 }
346
ebfd146a
IR
347 return (*live_p || *relevant);
348}
349
350
b8698a0f 351/* Function exist_non_indexing_operands_for_use_p
ebfd146a 352
ff802fa1 353 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
354 used in STMT for anything other than indexing an array. */
355
356static bool
355fe088 357exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
358{
359 tree operand;
360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 361
ff802fa1 362 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info))
366 return true;
59a05b0c 367
ebfd146a
IR
368 /* STMT has a data_ref. FORNOW this means that its of one of
369 the following forms:
370 -1- ARRAY_REF = var
371 -2- var = ARRAY_REF
372 (This should have been verified in analyze_data_refs).
373
374 'var' in the second case corresponds to a def, not a use,
b8698a0f 375 so USE cannot correspond to any operands that are not used
ebfd146a
IR
376 for array indexing.
377
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
ebfd146a
IR
380
381 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
382 {
383 if (is_gimple_call (stmt)
384 && gimple_call_internal_p (stmt))
385 switch (gimple_call_internal_fn (stmt))
386 {
387 case IFN_MASK_STORE:
388 operand = gimple_call_arg (stmt, 3);
389 if (operand == use)
390 return true;
391 /* FALLTHRU */
392 case IFN_MASK_LOAD:
393 operand = gimple_call_arg (stmt, 2);
394 if (operand == use)
395 return true;
396 break;
397 default:
398 break;
399 }
400 return false;
401 }
402
59a05b0c
EB
403 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
404 return false;
ebfd146a 405 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
406 if (TREE_CODE (operand) != SSA_NAME)
407 return false;
408
409 if (operand == use)
410 return true;
411
412 return false;
413}
414
415
b8698a0f 416/*
ebfd146a
IR
417 Function process_use.
418
419 Inputs:
420 - a USE in STMT in a loop represented by LOOP_VINFO
b28ead45 421 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
ff802fa1 422 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
425 be performed.
ebfd146a
IR
426
427 Outputs:
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
432 Exceptions:
433 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 434 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 435 of the respective DEF_STMT is left unchanged.
b8698a0f
L
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
440
441 Return true if everything is as expected. Return false otherwise. */
442
443static bool
b28ead45 444process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
355fe088 445 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 446 bool force)
ebfd146a
IR
447{
448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
449 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
450 stmt_vec_info dstmt_vinfo;
451 basic_block bb, def_bb;
355fe088 452 gimple *def_stmt;
ebfd146a
IR
453 enum vect_def_type dt;
454
b8698a0f 455 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 456 that are used for address computation are not considered relevant. */
aec7ae7d 457 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
458 return true;
459
81c40241 460 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 461 {
73fbfcad 462 if (dump_enabled_p ())
78c60e3d 463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 464 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
465 return false;
466 }
467
468 if (!def_stmt || gimple_nop_p (def_stmt))
469 return true;
470
471 def_bb = gimple_bb (def_stmt);
472 if (!flow_bb_inside_loop_p (loop, def_bb))
473 {
73fbfcad 474 if (dump_enabled_p ())
e645e942 475 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
476 return true;
477 }
478
b8698a0f
L
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo = vinfo_for_stmt (def_stmt);
485 bb = gimple_bb (stmt);
486 if (gimple_code (stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (def_stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
491 {
73fbfcad 492 if (dump_enabled_p ())
78c60e3d 493 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 494 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
496 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
500 return true;
501 }
502
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
505 d = def_stmt
506 inner-loop:
507 stmt # use (d)
508 outer-loop-tail-bb:
509 ... */
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
511 {
73fbfcad 512 if (dump_enabled_p ())
78c60e3d 513 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 514 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 515
ebfd146a
IR
516 switch (relevant)
517 {
8644a673 518 case vect_unused_in_scope:
7c5222ff
IR
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 521 break;
7c5222ff 522
ebfd146a 523 case vect_used_in_outer_by_reduction:
7c5222ff 524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
525 relevant = vect_used_by_reduction;
526 break;
7c5222ff 527
ebfd146a 528 case vect_used_in_outer:
7c5222ff 529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 530 relevant = vect_used_in_scope;
ebfd146a 531 break;
7c5222ff 532
8644a673 533 case vect_used_in_scope:
ebfd146a
IR
534 break;
535
536 default:
537 gcc_unreachable ();
b8698a0f 538 }
ebfd146a
IR
539 }
540
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
543 ...
544 inner-loop:
545 d = def_stmt
06066f92 546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
547 stmt # use (d) */
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
549 {
73fbfcad 550 if (dump_enabled_p ())
78c60e3d 551 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 552 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 553
ebfd146a
IR
554 switch (relevant)
555 {
8644a673 556 case vect_unused_in_scope:
b8698a0f 557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
560 break;
561
ebfd146a 562 case vect_used_by_reduction:
b28ead45 563 case vect_used_only_live:
ebfd146a
IR
564 relevant = vect_used_in_outer_by_reduction;
565 break;
566
8644a673 567 case vect_used_in_scope:
ebfd146a
IR
568 relevant = vect_used_in_outer;
569 break;
570
571 default:
572 gcc_unreachable ();
573 }
574 }
575
b28ead45 576 vect_mark_relevant (worklist, def_stmt, relevant, false);
ebfd146a
IR
577 return true;
578}
579
580
581/* Function vect_mark_stmts_to_be_vectorized.
582
583 Not all stmts in the loop need to be vectorized. For example:
584
585 for i...
586 for j...
587 1. T0 = i + j
588 2. T1 = a[T0]
589
590 3. j = j + 1
591
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
594
595 This pass detects such stmts. */
596
597bool
598vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
599{
ebfd146a
IR
600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
602 unsigned int nbbs = loop->num_nodes;
603 gimple_stmt_iterator si;
355fe088 604 gimple *stmt;
ebfd146a
IR
605 unsigned int i;
606 stmt_vec_info stmt_vinfo;
607 basic_block bb;
355fe088 608 gimple *phi;
ebfd146a 609 bool live_p;
b28ead45 610 enum vect_relevant relevant;
ebfd146a 611
73fbfcad 612 if (dump_enabled_p ())
78c60e3d 613 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 614 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 615
355fe088 616 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
617
618 /* 1. Init worklist. */
619 for (i = 0; i < nbbs; i++)
620 {
621 bb = bbs[i];
622 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 623 {
ebfd146a 624 phi = gsi_stmt (si);
73fbfcad 625 if (dump_enabled_p ())
ebfd146a 626 {
78c60e3d
SS
627 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
629 }
630
631 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
97ecdb46 632 vect_mark_relevant (&worklist, phi, relevant, live_p);
ebfd146a
IR
633 }
634 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
635 {
636 stmt = gsi_stmt (si);
73fbfcad 637 if (dump_enabled_p ())
ebfd146a 638 {
78c60e3d
SS
639 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 641 }
ebfd146a
IR
642
643 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
97ecdb46 644 vect_mark_relevant (&worklist, stmt, relevant, live_p);
ebfd146a
IR
645 }
646 }
647
648 /* 2. Process_worklist */
9771b263 649 while (worklist.length () > 0)
ebfd146a
IR
650 {
651 use_operand_p use_p;
652 ssa_op_iter iter;
653
9771b263 654 stmt = worklist.pop ();
73fbfcad 655 if (dump_enabled_p ())
ebfd146a 656 {
78c60e3d
SS
657 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
659 }
660
b8698a0f 661 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
b28ead45
AH
662 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 of STMT. */
ebfd146a
IR
664 stmt_vinfo = vinfo_for_stmt (stmt);
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
ebfd146a 666
b28ead45
AH
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
ebfd146a
IR
669
670 One exception is when STMT has been identified as defining a reduction
b28ead45 671 variable; in this case we set the relevance to vect_used_by_reduction.
ebfd146a 672 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 673 those that are used by a reduction computation, and those that are
ff802fa1 674 (also) used by a regular computation. This allows us later on to
b8698a0f 675 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 676 order of the results that they produce does not have to be kept. */
ebfd146a 677
b28ead45 678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
ebfd146a 679 {
06066f92 680 case vect_reduction_def:
b28ead45
AH
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
06066f92 686 {
b28ead45
AH
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "unsupported use of reduction.\n");
690 return false;
06066f92 691 }
06066f92 692 break;
b8698a0f 693
06066f92 694 case vect_nested_cycle:
b28ead45
AH
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_in_outer_by_reduction
697 && relevant != vect_used_in_outer)
06066f92 698 {
73fbfcad 699 if (dump_enabled_p ())
78c60e3d 700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 701 "unsupported use of nested cycle.\n");
7c5222ff 702
06066f92
IR
703 return false;
704 }
b8698a0f
L
705 break;
706
06066f92 707 case vect_double_reduction_def:
b28ead45
AH
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_by_reduction
710 && relevant != vect_used_only_live)
06066f92 711 {
73fbfcad 712 if (dump_enabled_p ())
78c60e3d 713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 714 "unsupported use of double reduction.\n");
7c5222ff 715
7c5222ff 716 return false;
06066f92 717 }
b8698a0f 718 break;
7c5222ff 719
06066f92
IR
720 default:
721 break;
7c5222ff 722 }
b8698a0f 723
aec7ae7d 724 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
725 {
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
730 {
69d2aade
JJ
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
733
734 i = 1;
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
736 {
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
b28ead45 738 relevant, &worklist, false)
69d2aade 739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
b28ead45 740 relevant, &worklist, false))
566d377a 741 return false;
69d2aade
JJ
742 i = 2;
743 }
744 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 745 {
69d2aade 746 op = gimple_op (stmt, i);
afbe6325 747 if (TREE_CODE (op) == SSA_NAME
b28ead45 748 && !process_use (stmt, op, loop_vinfo, relevant,
afbe6325 749 &worklist, false))
07687835 750 return false;
9d5e7640
IR
751 }
752 }
753 else if (is_gimple_call (stmt))
754 {
755 for (i = 0; i < gimple_call_num_args (stmt); i++)
756 {
757 tree arg = gimple_call_arg (stmt, i);
b28ead45 758 if (!process_use (stmt, arg, loop_vinfo, relevant,
aec7ae7d 759 &worklist, false))
07687835 760 return false;
9d5e7640
IR
761 }
762 }
763 }
764 else
765 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
766 {
767 tree op = USE_FROM_PTR (use_p);
b28ead45 768 if (!process_use (stmt, op, loop_vinfo, relevant,
aec7ae7d 769 &worklist, false))
07687835 770 return false;
9d5e7640 771 }
aec7ae7d 772
3bab6342 773 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d
JJ
774 {
775 tree off;
3bab6342 776 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
aec7ae7d 777 gcc_assert (decl);
b28ead45 778 if (!process_use (stmt, off, loop_vinfo, relevant, &worklist, true))
566d377a 779 return false;
aec7ae7d 780 }
ebfd146a
IR
781 } /* while worklist */
782
ebfd146a
IR
783 return true;
784}
785
786
b8698a0f 787/* Function vect_model_simple_cost.
ebfd146a 788
b8698a0f 789 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
790 single op. Right now, this does not account for multiple insns that could
791 be generated for the single vector op. We will handle that shortly. */
792
793void
b8698a0f 794vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
795 enum vect_def_type *dt,
796 stmt_vector_for_cost *prologue_cost_vec,
797 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
798{
799 int i;
92345349 800 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
801
802 /* The SLP costs were already calculated during SLP tree build. */
803 if (PURE_SLP_STMT (stmt_info))
804 return;
805
ebfd146a
IR
806 /* FORNOW: Assuming maximum 2 args per stmts. */
807 for (i = 0; i < 2; i++)
92345349
BS
808 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
809 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
810 stmt_info, 0, vect_prologue);
c3e7ee41
BS
811
812 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
813 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
814 stmt_info, 0, vect_body);
c3e7ee41 815
73fbfcad 816 if (dump_enabled_p ())
78c60e3d
SS
817 dump_printf_loc (MSG_NOTE, vect_location,
818 "vect_model_simple_cost: inside_cost = %d, "
e645e942 819 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
820}
821
822
8bd37302
BS
823/* Model cost for type demotion and promotion operations. PWR is normally
824 zero for single-step promotions and demotions. It will be one if
825 two-step promotion/demotion is required, and so on. Each additional
826 step doubles the number of instructions required. */
827
828static void
829vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
830 enum vect_def_type *dt, int pwr)
831{
832 int i, tmp;
92345349 833 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
834 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
835 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
836 void *target_cost_data;
8bd37302
BS
837
838 /* The SLP costs were already calculated during SLP tree build. */
839 if (PURE_SLP_STMT (stmt_info))
840 return;
841
c3e7ee41
BS
842 if (loop_vinfo)
843 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
844 else
845 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
846
8bd37302
BS
847 for (i = 0; i < pwr + 1; i++)
848 {
849 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
850 (i + 1) : i;
c3e7ee41 851 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
852 vec_promote_demote, stmt_info, 0,
853 vect_body);
8bd37302
BS
854 }
855
856 /* FORNOW: Assuming maximum 2 args per stmts. */
857 for (i = 0; i < 2; i++)
92345349
BS
858 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
859 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
860 stmt_info, 0, vect_prologue);
8bd37302 861
73fbfcad 862 if (dump_enabled_p ())
78c60e3d
SS
863 dump_printf_loc (MSG_NOTE, vect_location,
864 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 865 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
866}
867
0d0293ac 868/* Function vect_cost_group_size
b8698a0f 869
0d0293ac 870 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
871 load or store of a group, else return 1. This ensures that group size is
872 only returned once per group. */
873
874static int
0d0293ac 875vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 876{
355fe088 877 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
878
879 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 880 return GROUP_SIZE (stmt_info);
ebfd146a
IR
881
882 return 1;
883}
884
885
886/* Function vect_model_store_cost
887
0d0293ac
MM
888 Models cost for stores. In the case of grouped accesses, one access
889 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
890
891void
b8698a0f 892vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 893 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
894 slp_tree slp_node,
895 stmt_vector_for_cost *prologue_cost_vec,
896 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
897{
898 int group_size;
92345349 899 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239 900 struct data_reference *first_dr;
355fe088 901 gimple *first_stmt;
ebfd146a 902
8644a673 903 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
904 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
905 stmt_info, 0, vect_prologue);
ebfd146a 906
0d0293ac
MM
907 /* Grouped access? */
908 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
909 {
910 if (slp_node)
911 {
9771b263 912 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
913 group_size = 1;
914 }
915 else
916 {
e14c1050 917 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 918 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
919 }
920
921 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
922 }
0d0293ac 923 /* Not a grouped access. */
ebfd146a 924 else
720f5239
IR
925 {
926 group_size = 1;
927 first_dr = STMT_VINFO_DATA_REF (stmt_info);
928 }
ebfd146a 929
272c6793 930 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 931 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
932 access is instead being provided by a permute-and-store operation,
933 include the cost of the permutes. */
cee62fee
MM
934 if (!store_lanes_p && group_size > 1
935 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 936 {
e1377713
ES
937 /* Uses a high and low interleave or shuffle operations for each
938 needed permute. */
939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
ebfd146a 942
73fbfcad 943 if (dump_enabled_p ())
78c60e3d 944 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 945 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 946 group_size);
ebfd146a
IR
947 }
948
cee62fee 949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 950 /* Costs of the stores. */
cee62fee
MM
951 if (STMT_VINFO_STRIDED_P (stmt_info)
952 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
f2e2a985
MM
953 {
954 /* N scalar stores plus extracting the elements. */
f2e2a985
MM
955 inside_cost += record_stmt_cost (body_cost_vec,
956 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
957 scalar_store, stmt_info, 0, vect_body);
f2e2a985
MM
958 }
959 else
960 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 961
cee62fee
MM
962 if (STMT_VINFO_STRIDED_P (stmt_info))
963 inside_cost += record_stmt_cost (body_cost_vec,
964 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
965 vec_to_scalar, stmt_info, 0, vect_body);
966
73fbfcad 967 if (dump_enabled_p ())
78c60e3d
SS
968 dump_printf_loc (MSG_NOTE, vect_location,
969 "vect_model_store_cost: inside_cost = %d, "
e645e942 970 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
971}
972
973
720f5239
IR
974/* Calculate cost of DR's memory access. */
975void
976vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 977 unsigned int *inside_cost,
92345349 978 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
979{
980 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 981 gimple *stmt = DR_STMT (dr);
c3e7ee41 982 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
983
984 switch (alignment_support_scheme)
985 {
986 case dr_aligned:
987 {
92345349
BS
988 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
989 vector_store, stmt_info, 0,
990 vect_body);
720f5239 991
73fbfcad 992 if (dump_enabled_p ())
78c60e3d 993 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 994 "vect_model_store_cost: aligned.\n");
720f5239
IR
995 break;
996 }
997
998 case dr_unaligned_supported:
999 {
720f5239 1000 /* Here, we assign an additional cost for the unaligned store. */
92345349 1001 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1002 unaligned_store, stmt_info,
92345349 1003 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1004 if (dump_enabled_p ())
78c60e3d
SS
1005 dump_printf_loc (MSG_NOTE, vect_location,
1006 "vect_model_store_cost: unaligned supported by "
e645e942 1007 "hardware.\n");
720f5239
IR
1008 break;
1009 }
1010
38eec4c6
UW
1011 case dr_unaligned_unsupported:
1012 {
1013 *inside_cost = VECT_MAX_COST;
1014
73fbfcad 1015 if (dump_enabled_p ())
78c60e3d 1016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1017 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1018 break;
1019 }
1020
720f5239
IR
1021 default:
1022 gcc_unreachable ();
1023 }
1024}
1025
1026
ebfd146a
IR
1027/* Function vect_model_load_cost
1028
0d0293ac
MM
1029 Models cost for loads. In the case of grouped accesses, the last access
1030 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1031 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1032 access scheme chosen. */
1033
1034void
92345349
BS
1035vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1036 bool load_lanes_p, slp_tree slp_node,
1037 stmt_vector_for_cost *prologue_cost_vec,
1038 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1039{
1040 int group_size;
355fe088 1041 gimple *first_stmt;
ebfd146a 1042 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1043 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a 1044
0d0293ac 1045 /* Grouped accesses? */
e14c1050 1046 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1048 {
0d0293ac 1049 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1050 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1051 }
0d0293ac 1052 /* Not a grouped access. */
ebfd146a
IR
1053 else
1054 {
1055 group_size = 1;
1056 first_dr = dr;
1057 }
1058
272c6793 1059 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1060 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1061 access is instead being provided by a load-and-permute operation,
1062 include the cost of the permutes. */
7b5fc413 1063 if (!load_lanes_p && group_size > 1
f2e2a985 1064 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 1065 {
2c23db6d
ES
1066 /* Uses an even and odd extract operations or shuffle operations
1067 for each needed permute. */
1068 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1069 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1070 stmt_info, 0, vect_body);
ebfd146a 1071
73fbfcad 1072 if (dump_enabled_p ())
e645e942
TJ
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1075 group_size);
ebfd146a
IR
1076 }
1077
1078 /* The loads themselves. */
f2e2a985 1079 if (STMT_VINFO_STRIDED_P (stmt_info)
7b5fc413 1080 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
a82960aa 1081 {
a21892ad
BS
1082 /* N scalar loads plus gathering them into a vector. */
1083 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1084 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1085 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349 1086 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1087 }
1088 else
1089 vect_get_load_cost (first_dr, ncopies,
1090 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1091 || group_size > 1 || slp_node),
92345349
BS
1092 &inside_cost, &prologue_cost,
1093 prologue_cost_vec, body_cost_vec, true);
f2e2a985 1094 if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413
RB
1095 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1096 stmt_info, 0, vect_body);
720f5239 1097
73fbfcad 1098 if (dump_enabled_p ())
78c60e3d
SS
1099 dump_printf_loc (MSG_NOTE, vect_location,
1100 "vect_model_load_cost: inside_cost = %d, "
e645e942 1101 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1102}
1103
1104
1105/* Calculate cost of DR's memory access. */
1106void
1107vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1108 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1109 unsigned int *prologue_cost,
1110 stmt_vector_for_cost *prologue_cost_vec,
1111 stmt_vector_for_cost *body_cost_vec,
1112 bool record_prologue_costs)
720f5239
IR
1113{
1114 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1115 gimple *stmt = DR_STMT (dr);
c3e7ee41 1116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1117
1118 switch (alignment_support_scheme)
ebfd146a
IR
1119 {
1120 case dr_aligned:
1121 {
92345349
BS
1122 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1123 stmt_info, 0, vect_body);
ebfd146a 1124
73fbfcad 1125 if (dump_enabled_p ())
78c60e3d 1126 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1127 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1128
1129 break;
1130 }
1131 case dr_unaligned_supported:
1132 {
720f5239 1133 /* Here, we assign an additional cost for the unaligned load. */
92345349 1134 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1135 unaligned_load, stmt_info,
92345349 1136 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1137
73fbfcad 1138 if (dump_enabled_p ())
78c60e3d
SS
1139 dump_printf_loc (MSG_NOTE, vect_location,
1140 "vect_model_load_cost: unaligned supported by "
e645e942 1141 "hardware.\n");
ebfd146a
IR
1142
1143 break;
1144 }
1145 case dr_explicit_realign:
1146 {
92345349
BS
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1148 vector_load, stmt_info, 0, vect_body);
1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1150 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1151
1152 /* FIXME: If the misalignment remains fixed across the iterations of
1153 the containing loop, the following cost should be added to the
92345349 1154 prologue costs. */
ebfd146a 1155 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1156 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1157 stmt_info, 0, vect_body);
ebfd146a 1158
73fbfcad 1159 if (dump_enabled_p ())
e645e942
TJ
1160 dump_printf_loc (MSG_NOTE, vect_location,
1161 "vect_model_load_cost: explicit realign\n");
8bd37302 1162
ebfd146a
IR
1163 break;
1164 }
1165 case dr_explicit_realign_optimized:
1166 {
73fbfcad 1167 if (dump_enabled_p ())
e645e942 1168 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1169 "vect_model_load_cost: unaligned software "
e645e942 1170 "pipelined.\n");
ebfd146a
IR
1171
1172 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1173 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1174 if this is an access in a group of loads, which provide grouped
ebfd146a 1175 access, then the above cost should only be considered for one
ff802fa1 1176 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1177 and a realignment op. */
1178
92345349 1179 if (add_realign_cost && record_prologue_costs)
ebfd146a 1180 {
92345349
BS
1181 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1182 vector_stmt, stmt_info,
1183 0, vect_prologue);
ebfd146a 1184 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1186 vector_stmt, stmt_info,
1187 0, vect_prologue);
ebfd146a
IR
1188 }
1189
92345349
BS
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1191 stmt_info, 0, vect_body);
1192 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1193 stmt_info, 0, vect_body);
8bd37302 1194
73fbfcad 1195 if (dump_enabled_p ())
78c60e3d 1196 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1197 "vect_model_load_cost: explicit realign optimized"
1198 "\n");
8bd37302 1199
ebfd146a
IR
1200 break;
1201 }
1202
38eec4c6
UW
1203 case dr_unaligned_unsupported:
1204 {
1205 *inside_cost = VECT_MAX_COST;
1206
73fbfcad 1207 if (dump_enabled_p ())
78c60e3d 1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1209 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1210 break;
1211 }
1212
ebfd146a
IR
1213 default:
1214 gcc_unreachable ();
1215 }
ebfd146a
IR
1216}
1217
418b7df3
RG
1218/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1219 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1220
418b7df3 1221static void
355fe088 1222vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1223{
ebfd146a 1224 if (gsi)
418b7df3 1225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1226 else
1227 {
418b7df3 1228 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1230
a70d6342
IR
1231 if (loop_vinfo)
1232 {
1233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1234 basic_block new_bb;
1235 edge pe;
a70d6342
IR
1236
1237 if (nested_in_vect_loop_p (loop, stmt))
1238 loop = loop->inner;
b8698a0f 1239
a70d6342 1240 pe = loop_preheader_edge (loop);
418b7df3 1241 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1242 gcc_assert (!new_bb);
1243 }
1244 else
1245 {
1246 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1247 basic_block bb;
1248 gimple_stmt_iterator gsi_bb_start;
1249
1250 gcc_assert (bb_vinfo);
1251 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1252 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1253 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1254 }
ebfd146a
IR
1255 }
1256
73fbfcad 1257 if (dump_enabled_p ())
ebfd146a 1258 {
78c60e3d
SS
1259 dump_printf_loc (MSG_NOTE, vect_location,
1260 "created new init_stmt: ");
1261 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1262 }
418b7df3
RG
1263}
1264
1265/* Function vect_init_vector.
ebfd146a 1266
5467ee52
RG
1267 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1268 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1269 vector type a vector with all elements equal to VAL is created first.
1270 Place the initialization at BSI if it is not NULL. Otherwise, place the
1271 initialization at the loop preheader.
418b7df3
RG
1272 Return the DEF of INIT_STMT.
1273 It will be used in the vectorization of STMT. */
1274
1275tree
355fe088 1276vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1277{
355fe088 1278 gimple *init_stmt;
418b7df3
RG
1279 tree new_temp;
1280
e412ece4
RB
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
418b7df3 1283 {
e412ece4
RB
1284 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1285 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1286 {
5a308cf1
IE
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type))
1290 {
b3d51f23
IE
1291 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1292 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1293
1294 if (CONSTANT_CLASS_P (val))
1295 val = integer_zerop (val) ? false_val : true_val;
1296 else
1297 {
1298 new_temp = make_ssa_name (TREE_TYPE (type));
1299 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1300 val, true_val, false_val);
1301 vect_init_vector_1 (stmt, init_stmt, gsi);
1302 val = new_temp;
1303 }
1304 }
1305 else if (CONSTANT_CLASS_P (val))
42fd8198 1306 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1307 else
1308 {
b731b390 1309 new_temp = make_ssa_name (TREE_TYPE (type));
e412ece4
RB
1310 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1311 init_stmt = gimple_build_assign (new_temp,
1312 fold_build1 (VIEW_CONVERT_EXPR,
1313 TREE_TYPE (type),
1314 val));
1315 else
1316 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1317 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1318 val = new_temp;
418b7df3
RG
1319 }
1320 }
5467ee52 1321 val = build_vector_from_val (type, val);
418b7df3
RG
1322 }
1323
0e22bb5a
RB
1324 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1325 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1326 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1327 return new_temp;
ebfd146a
IR
1328}
1329
c83a894c 1330/* Function vect_get_vec_def_for_operand_1.
a70d6342 1331
c83a894c
AH
1332 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1333 DT that will be used in the vectorized stmt. */
ebfd146a
IR
1334
1335tree
c83a894c 1336vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
ebfd146a
IR
1337{
1338 tree vec_oprnd;
355fe088 1339 gimple *vec_stmt;
ebfd146a 1340 stmt_vec_info def_stmt_info = NULL;
ebfd146a
IR
1341
1342 switch (dt)
1343 {
81c40241 1344 /* operand is a constant or a loop invariant. */
ebfd146a 1345 case vect_constant_def:
81c40241 1346 case vect_external_def:
c83a894c
AH
1347 /* Code should use vect_get_vec_def_for_operand. */
1348 gcc_unreachable ();
ebfd146a 1349
81c40241 1350 /* operand is defined inside the loop. */
8644a673 1351 case vect_internal_def:
ebfd146a 1352 {
ebfd146a
IR
1353 /* Get the def from the vectorized stmt. */
1354 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1355
ebfd146a 1356 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1357 /* Get vectorized pattern statement. */
1358 if (!vec_stmt
1359 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1360 && !STMT_VINFO_RELEVANT (def_stmt_info))
1361 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1362 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1363 gcc_assert (vec_stmt);
1364 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1365 vec_oprnd = PHI_RESULT (vec_stmt);
1366 else if (is_gimple_call (vec_stmt))
1367 vec_oprnd = gimple_call_lhs (vec_stmt);
1368 else
1369 vec_oprnd = gimple_assign_lhs (vec_stmt);
1370 return vec_oprnd;
1371 }
1372
81c40241 1373 /* operand is defined by a loop header phi - reduction */
ebfd146a 1374 case vect_reduction_def:
06066f92 1375 case vect_double_reduction_def:
7c5222ff 1376 case vect_nested_cycle:
81c40241
RB
1377 /* Code should use get_initial_def_for_reduction. */
1378 gcc_unreachable ();
ebfd146a 1379
81c40241 1380 /* operand is defined by loop-header phi - induction. */
ebfd146a
IR
1381 case vect_induction_def:
1382 {
1383 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1384
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info = vinfo_for_stmt (def_stmt);
1387 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1388 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt);
1390 else
1391 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1392 return vec_oprnd;
1393 }
1394
1395 default:
1396 gcc_unreachable ();
1397 }
1398}
1399
1400
c83a894c
AH
1401/* Function vect_get_vec_def_for_operand.
1402
1403 OP is an operand in STMT. This function returns a (vector) def that will be
1404 used in the vectorized stmt for STMT.
1405
1406 In the case that OP is an SSA_NAME which is defined in the loop, then
1407 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1408
1409 In case OP is an invariant or constant, a new stmt that creates a vector def
1410 needs to be introduced. VECTYPE may be used to specify a required type for
1411 vector invariant. */
1412
1413tree
1414vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1415{
1416 gimple *def_stmt;
1417 enum vect_def_type dt;
1418 bool is_simple_use;
1419 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1421
1422 if (dump_enabled_p ())
1423 {
1424 dump_printf_loc (MSG_NOTE, vect_location,
1425 "vect_get_vec_def_for_operand: ");
1426 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1427 dump_printf (MSG_NOTE, "\n");
1428 }
1429
1430 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1431 gcc_assert (is_simple_use);
1432 if (def_stmt && dump_enabled_p ())
1433 {
1434 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1435 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1436 }
1437
1438 if (dt == vect_constant_def || dt == vect_external_def)
1439 {
1440 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1441 tree vector_type;
1442
1443 if (vectype)
1444 vector_type = vectype;
1445 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1446 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1447 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1448 else
1449 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1450
1451 gcc_assert (vector_type);
1452 return vect_init_vector (stmt, op, vector_type, NULL);
1453 }
1454 else
1455 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1456}
1457
1458
ebfd146a
IR
1459/* Function vect_get_vec_def_for_stmt_copy
1460
ff802fa1 1461 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1462 vectorized stmt to be created (by the caller to this function) is a "copy"
1463 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1464 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1465 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1466 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1467 DT is the type of the vector def VEC_OPRND.
1468
1469 Context:
1470 In case the vectorization factor (VF) is bigger than the number
1471 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1472 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1473 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1474 smallest data-type determines the VF, and as a result, when vectorizing
1475 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1476 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1477 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1478 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1479 which VF=16 and nunits=4, so the number of copies required is 4):
1480
1481 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1482
ebfd146a
IR
1483 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1484 VS1.1: vx.1 = memref1 VS1.2
1485 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1486 VS1.3: vx.3 = memref3
ebfd146a
IR
1487
1488 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1489 VSnew.1: vz1 = vx.1 + ... VSnew.2
1490 VSnew.2: vz2 = vx.2 + ... VSnew.3
1491 VSnew.3: vz3 = vx.3 + ...
1492
1493 The vectorization of S1 is explained in vectorizable_load.
1494 The vectorization of S2:
b8698a0f
L
1495 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1496 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1497 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1498 returns the vector-def 'vx.0'.
1499
b8698a0f
L
1500 To create the remaining copies of the vector-stmt (VSnew.j), this
1501 function is called to get the relevant vector-def for each operand. It is
1502 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1503 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1504
b8698a0f
L
1505 For example, to obtain the vector-def 'vx.1' in order to create the
1506 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1507 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1508 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1509 and return its def ('vx.1').
1510 Overall, to create the above sequence this function will be called 3 times:
1511 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1512 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1513 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1514
1515tree
1516vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1517{
355fe088 1518 gimple *vec_stmt_for_operand;
ebfd146a
IR
1519 stmt_vec_info def_stmt_info;
1520
1521 /* Do nothing; can reuse same def. */
8644a673 1522 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1523 return vec_oprnd;
1524
1525 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1526 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1527 gcc_assert (def_stmt_info);
1528 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1529 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1530 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1531 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1532 else
1533 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1534 return vec_oprnd;
1535}
1536
1537
1538/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1539 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1540
1541static void
b8698a0f 1542vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1543 vec<tree> *vec_oprnds0,
1544 vec<tree> *vec_oprnds1)
ebfd146a 1545{
9771b263 1546 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1547
1548 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1549 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1550
9771b263 1551 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1552 {
9771b263 1553 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1554 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1555 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1556 }
1557}
1558
1559
d092494c
IR
1560/* Get vectorized definitions for OP0 and OP1.
1561 REDUC_INDEX is the index of reduction operand in case of reduction,
1562 and -1 otherwise. */
ebfd146a 1563
d092494c 1564void
355fe088 1565vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1566 vec<tree> *vec_oprnds0,
1567 vec<tree> *vec_oprnds1,
d092494c 1568 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1569{
1570 if (slp_node)
d092494c
IR
1571 {
1572 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1573 auto_vec<tree> ops (nops);
1574 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1575
9771b263 1576 ops.quick_push (op0);
d092494c 1577 if (op1)
9771b263 1578 ops.quick_push (op1);
d092494c
IR
1579
1580 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1581
37b5ec8f 1582 *vec_oprnds0 = vec_defs[0];
d092494c 1583 if (op1)
37b5ec8f 1584 *vec_oprnds1 = vec_defs[1];
d092494c 1585 }
ebfd146a
IR
1586 else
1587 {
1588 tree vec_oprnd;
1589
9771b263 1590 vec_oprnds0->create (1);
81c40241 1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1592 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1593
1594 if (op1)
1595 {
9771b263 1596 vec_oprnds1->create (1);
81c40241 1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1598 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1599 }
1600 }
1601}
1602
1603
1604/* Function vect_finish_stmt_generation.
1605
1606 Insert a new stmt. */
1607
1608void
355fe088 1609vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1610 gimple_stmt_iterator *gsi)
1611{
1612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1613 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1614
1615 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1616
54e8e2c3
RG
1617 if (!gsi_end_p (*gsi)
1618 && gimple_has_mem_ops (vec_stmt))
1619 {
355fe088 1620 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1621 tree vuse = gimple_vuse (at_stmt);
1622 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1623 {
1624 tree vdef = gimple_vdef (at_stmt);
1625 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1631 && ((is_gimple_assign (vec_stmt)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1633 || (is_gimple_call (vec_stmt)
1634 && !(gimple_call_flags (vec_stmt)
1635 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1636 {
1637 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1638 gimple_set_vdef (vec_stmt, new_vdef);
1639 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1640 }
1641 }
1642 }
ebfd146a
IR
1643 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1644
310213d4 1645 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1646
73fbfcad 1647 if (dump_enabled_p ())
ebfd146a 1648 {
78c60e3d
SS
1649 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1651 }
1652
ad885386 1653 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1654
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr = lookup_stmt_eh_lp (stmt);
1659 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1660 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1661}
1662
70439f0d
RS
1663/* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1667
70439f0d
RS
1668static internal_fn
1669vectorizable_internal_function (combined_fn cfn, tree fndecl,
1670 tree vectype_out, tree vectype_in)
ebfd146a 1671{
70439f0d
RS
1672 internal_fn ifn;
1673 if (internal_fn_p (cfn))
1674 ifn = as_internal_fn (cfn);
1675 else
1676 ifn = associated_internal_fn (fndecl);
1677 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1678 {
1679 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1680 if (info.vectorizable)
1681 {
1682 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1683 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1684 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1685 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1686 return ifn;
1687 }
1688 }
1689 return IFN_LAST;
ebfd146a
IR
1690}
1691
5ce9450f 1692
355fe088 1693static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1694 gimple_stmt_iterator *);
1695
1696
1697/* Function vectorizable_mask_load_store.
1698
1699 Check if STMT performs a conditional load or store that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703
1704static bool
355fe088
TS
1705vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1706 gimple **vec_stmt, slp_tree slp_node)
5ce9450f
JJ
1707{
1708 tree vec_dest = NULL;
1709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1710 stmt_vec_info prev_stmt_info;
1711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1712 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1713 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1714 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1715 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
57e2f6ad 1716 tree rhs_vectype = NULL_TREE;
045c1278 1717 tree mask_vectype;
5ce9450f 1718 tree elem_type;
355fe088 1719 gimple *new_stmt;
5ce9450f
JJ
1720 tree dummy;
1721 tree dataref_ptr = NULL_TREE;
355fe088 1722 gimple *ptr_incr;
5ce9450f
JJ
1723 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1724 int ncopies;
1725 int i, j;
1726 bool inv_p;
1727 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1728 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1729 int gather_scale = 1;
1730 enum vect_def_type gather_dt = vect_unknown_def_type;
1731 bool is_store;
1732 tree mask;
355fe088 1733 gimple *def_stmt;
5ce9450f
JJ
1734 enum vect_def_type dt;
1735
1736 if (slp_node != NULL)
1737 return false;
1738
1739 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1740 gcc_assert (ncopies >= 1);
1741
1742 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1743 mask = gimple_call_arg (stmt, 2);
045c1278
IE
1744
1745 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
5ce9450f
JJ
1746 return false;
1747
1748 /* FORNOW. This restriction should be relaxed. */
1749 if (nested_in_vect_loop && ncopies > 1)
1750 {
1751 if (dump_enabled_p ())
1752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1753 "multiple types in nested loop.");
1754 return false;
1755 }
1756
1757 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1758 return false;
1759
66c16fd9
RB
1760 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1761 && ! vec_stmt)
5ce9450f
JJ
1762 return false;
1763
1764 if (!STMT_VINFO_DATA_REF (stmt_info))
1765 return false;
1766
1767 elem_type = TREE_TYPE (vectype);
1768
1769 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1770 return false;
1771
f2e2a985 1772 if (STMT_VINFO_STRIDED_P (stmt_info))
5ce9450f
JJ
1773 return false;
1774
045c1278
IE
1775 if (TREE_CODE (mask) != SSA_NAME)
1776 return false;
1777
1778 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1779 return false;
1780
1781 if (!mask_vectype)
1782 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1783
dc6a3147
IE
1784 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
1785 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
045c1278
IE
1786 return false;
1787
57e2f6ad
IE
1788 if (is_store)
1789 {
1790 tree rhs = gimple_call_arg (stmt, 3);
1791 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1792 return false;
1793 }
1794
3bab6342 1795 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5ce9450f 1796 {
355fe088 1797 gimple *def_stmt;
3bab6342 1798 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
5ce9450f
JJ
1799 &gather_off, &gather_scale);
1800 gcc_assert (gather_decl);
81c40241
RB
1801 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1802 &gather_off_vectype))
5ce9450f
JJ
1803 {
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "gather index use not simple.");
1807 return false;
1808 }
03b9e8e4
JJ
1809
1810 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1811 tree masktype
1812 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1813 if (TREE_CODE (masktype) == INTEGER_TYPE)
1814 {
1815 if (dump_enabled_p ())
1816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1817 "masked gather with integer mask not supported.");
1818 return false;
1819 }
5ce9450f
JJ
1820 }
1821 else if (tree_int_cst_compare (nested_in_vect_loop
1822 ? STMT_VINFO_DR_STEP (stmt_info)
1823 : DR_STEP (dr), size_zero_node) <= 0)
1824 return false;
1825 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
045c1278
IE
1826 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1827 TYPE_MODE (mask_vectype),
57e2f6ad
IE
1828 !is_store)
1829 || (rhs_vectype
1830 && !useless_type_conversion_p (vectype, rhs_vectype)))
5ce9450f
JJ
1831 return false;
1832
5ce9450f
JJ
1833 if (!vec_stmt) /* transformation not required. */
1834 {
1835 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1836 if (is_store)
1837 vect_model_store_cost (stmt_info, ncopies, false, dt,
1838 NULL, NULL, NULL);
1839 else
1840 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1841 return true;
1842 }
1843
1844 /** Transform. **/
1845
3bab6342 1846 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5ce9450f
JJ
1847 {
1848 tree vec_oprnd0 = NULL_TREE, op;
1849 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1850 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 1851 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 1852 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 1853 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
1854 edge pe = loop_preheader_edge (loop);
1855 gimple_seq seq;
1856 basic_block new_bb;
1857 enum { NARROW, NONE, WIDEN } modifier;
1858 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1859
acdcd61b
JJ
1860 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1861 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1862 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1863 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1864 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1865 scaletype = TREE_VALUE (arglist);
1866 gcc_checking_assert (types_compatible_p (srctype, rettype)
1867 && types_compatible_p (srctype, masktype));
1868
5ce9450f
JJ
1869 if (nunits == gather_off_nunits)
1870 modifier = NONE;
1871 else if (nunits == gather_off_nunits / 2)
1872 {
1873 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1874 modifier = WIDEN;
1875
1876 for (i = 0; i < gather_off_nunits; ++i)
1877 sel[i] = i | nunits;
1878
557be5a8 1879 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5ce9450f
JJ
1880 }
1881 else if (nunits == gather_off_nunits * 2)
1882 {
1883 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1884 modifier = NARROW;
1885
1886 for (i = 0; i < nunits; ++i)
1887 sel[i] = i < gather_off_nunits
1888 ? i : i + nunits - gather_off_nunits;
1889
557be5a8 1890 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5ce9450f 1891 ncopies *= 2;
acdcd61b
JJ
1892 for (i = 0; i < nunits; ++i)
1893 sel[i] = i | gather_off_nunits;
557be5a8 1894 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
5ce9450f
JJ
1895 }
1896 else
1897 gcc_unreachable ();
1898
5ce9450f
JJ
1899 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1900
1901 ptr = fold_convert (ptrtype, gather_base);
1902 if (!is_gimple_min_invariant (ptr))
1903 {
1904 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1905 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1906 gcc_assert (!new_bb);
1907 }
1908
1909 scale = build_int_cst (scaletype, gather_scale);
1910
1911 prev_stmt_info = NULL;
1912 for (j = 0; j < ncopies; ++j)
1913 {
1914 if (modifier == WIDEN && (j & 1))
1915 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1916 perm_mask, stmt, gsi);
1917 else if (j == 0)
1918 op = vec_oprnd0
81c40241 1919 = vect_get_vec_def_for_operand (gather_off, stmt);
5ce9450f
JJ
1920 else
1921 op = vec_oprnd0
1922 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1923
1924 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1925 {
1926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1927 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 1928 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5ce9450f
JJ
1929 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1930 new_stmt
0d0e4a03 1931 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1933 op = var;
1934 }
1935
acdcd61b
JJ
1936 if (mask_perm_mask && (j & 1))
1937 mask_op = permute_vec_elements (mask_op, mask_op,
1938 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
1939 else
1940 {
acdcd61b 1941 if (j == 0)
81c40241 1942 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
acdcd61b
JJ
1943 else
1944 {
81c40241 1945 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
acdcd61b
JJ
1946 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1947 }
5ce9450f 1948
acdcd61b
JJ
1949 mask_op = vec_mask;
1950 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1951 {
1952 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1953 == TYPE_VECTOR_SUBPARTS (masktype));
0e22bb5a 1954 var = vect_get_new_ssa_name (masktype, vect_simple_var);
acdcd61b
JJ
1955 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1956 new_stmt
0d0e4a03 1957 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
1958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1959 mask_op = var;
1960 }
5ce9450f
JJ
1961 }
1962
1963 new_stmt
1964 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1965 scale);
1966
1967 if (!useless_type_conversion_p (vectype, rettype))
1968 {
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1970 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 1971 op = vect_get_new_ssa_name (rettype, vect_simple_var);
5ce9450f
JJ
1972 gimple_call_set_lhs (new_stmt, op);
1973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 1974 var = make_ssa_name (vec_dest);
5ce9450f 1975 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 1976 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1977 }
1978 else
1979 {
1980 var = make_ssa_name (vec_dest, new_stmt);
1981 gimple_call_set_lhs (new_stmt, var);
1982 }
1983
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985
1986 if (modifier == NARROW)
1987 {
1988 if ((j & 1) == 0)
1989 {
1990 prev_res = var;
1991 continue;
1992 }
1993 var = permute_vec_elements (prev_res, var,
1994 perm_mask, stmt, gsi);
1995 new_stmt = SSA_NAME_DEF_STMT (var);
1996 }
1997
1998 if (prev_stmt_info == NULL)
1999 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2000 else
2001 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2002 prev_stmt_info = vinfo_for_stmt (new_stmt);
2003 }
3efe2e2c
JJ
2004
2005 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2006 from the IL. */
e6f5c25d
IE
2007 if (STMT_VINFO_RELATED_STMT (stmt_info))
2008 {
2009 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2010 stmt_info = vinfo_for_stmt (stmt);
2011 }
3efe2e2c
JJ
2012 tree lhs = gimple_call_lhs (stmt);
2013 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2014 set_vinfo_for_stmt (new_stmt, stmt_info);
2015 set_vinfo_for_stmt (stmt, NULL);
2016 STMT_VINFO_STMT (stmt_info) = new_stmt;
2017 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2018 return true;
2019 }
2020 else if (is_store)
2021 {
2022 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2023 prev_stmt_info = NULL;
2d4dc223 2024 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
5ce9450f
JJ
2025 for (i = 0; i < ncopies; i++)
2026 {
2027 unsigned align, misalign;
2028
2029 if (i == 0)
2030 {
2031 tree rhs = gimple_call_arg (stmt, 3);
81c40241
RB
2032 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2033 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
5ce9450f
JJ
2034 /* We should have catched mismatched types earlier. */
2035 gcc_assert (useless_type_conversion_p (vectype,
2036 TREE_TYPE (vec_rhs)));
2037 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2038 NULL_TREE, &dummy, gsi,
2039 &ptr_incr, false, &inv_p);
2040 gcc_assert (!inv_p);
2041 }
2042 else
2043 {
81c40241 2044 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
5ce9450f 2045 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
81c40241 2046 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2047 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2048 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2049 TYPE_SIZE_UNIT (vectype));
2050 }
2051
2052 align = TYPE_ALIGN_UNIT (vectype);
2053 if (aligned_access_p (dr))
2054 misalign = 0;
2055 else if (DR_MISALIGNMENT (dr) == -1)
2056 {
2057 align = TYPE_ALIGN_UNIT (elem_type);
2058 misalign = 0;
2059 }
2060 else
2061 misalign = DR_MISALIGNMENT (dr);
2062 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2063 misalign);
08554c26
JJ
2064 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2065 misalign ? misalign & -misalign : align);
5ce9450f
JJ
2066 new_stmt
2067 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
08554c26 2068 ptr, vec_mask, vec_rhs);
5ce9450f
JJ
2069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2070 if (i == 0)
2071 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2072 else
2073 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2074 prev_stmt_info = vinfo_for_stmt (new_stmt);
2075 }
2076 }
2077 else
2078 {
2079 tree vec_mask = NULL_TREE;
2080 prev_stmt_info = NULL;
2081 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2082 for (i = 0; i < ncopies; i++)
2083 {
2084 unsigned align, misalign;
2085
2086 if (i == 0)
2087 {
81c40241 2088 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
5ce9450f
JJ
2089 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2090 NULL_TREE, &dummy, gsi,
2091 &ptr_incr, false, &inv_p);
2092 gcc_assert (!inv_p);
2093 }
2094 else
2095 {
81c40241 2096 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2097 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2098 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2099 TYPE_SIZE_UNIT (vectype));
2100 }
2101
2102 align = TYPE_ALIGN_UNIT (vectype);
2103 if (aligned_access_p (dr))
2104 misalign = 0;
2105 else if (DR_MISALIGNMENT (dr) == -1)
2106 {
2107 align = TYPE_ALIGN_UNIT (elem_type);
2108 misalign = 0;
2109 }
2110 else
2111 misalign = DR_MISALIGNMENT (dr);
2112 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2113 misalign);
08554c26
JJ
2114 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2115 misalign ? misalign & -misalign : align);
5ce9450f
JJ
2116 new_stmt
2117 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
08554c26 2118 ptr, vec_mask);
b731b390 2119 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
5ce9450f
JJ
2120 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2121 if (i == 0)
2122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2123 else
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2125 prev_stmt_info = vinfo_for_stmt (new_stmt);
2126 }
2127 }
2128
3efe2e2c
JJ
2129 if (!is_store)
2130 {
2131 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2132 from the IL. */
e6f5c25d
IE
2133 if (STMT_VINFO_RELATED_STMT (stmt_info))
2134 {
2135 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2136 stmt_info = vinfo_for_stmt (stmt);
2137 }
3efe2e2c
JJ
2138 tree lhs = gimple_call_lhs (stmt);
2139 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2140 set_vinfo_for_stmt (new_stmt, stmt_info);
2141 set_vinfo_for_stmt (stmt, NULL);
2142 STMT_VINFO_STMT (stmt_info) = new_stmt;
2143 gsi_replace (gsi, new_stmt, true);
2144 }
2145
5ce9450f
JJ
2146 return true;
2147}
2148
b1b6836e
RS
2149/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2150 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2151 in a single step. On success, store the binary pack code in
2152 *CONVERT_CODE. */
2153
2154static bool
2155simple_integer_narrowing (tree vectype_out, tree vectype_in,
2156 tree_code *convert_code)
2157{
2158 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2159 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2160 return false;
2161
2162 tree_code code;
2163 int multi_step_cvt = 0;
2164 auto_vec <tree, 8> interm_types;
2165 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2166 &code, &multi_step_cvt,
2167 &interm_types)
2168 || multi_step_cvt)
2169 return false;
2170
2171 *convert_code = code;
2172 return true;
2173}
5ce9450f 2174
ebfd146a
IR
2175/* Function vectorizable_call.
2176
538dd0b7 2177 Check if GS performs a function call that can be vectorized.
b8698a0f 2178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2181
2182static bool
355fe088 2183vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2184 slp_tree slp_node)
ebfd146a 2185{
538dd0b7 2186 gcall *stmt;
ebfd146a
IR
2187 tree vec_dest;
2188 tree scalar_dest;
2189 tree op, type;
2190 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2191 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2192 tree vectype_out, vectype_in;
2193 int nunits_in;
2194 int nunits_out;
2195 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2196 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2197 vec_info *vinfo = stmt_info->vinfo;
81c40241 2198 tree fndecl, new_temp, rhs_type;
355fe088 2199 gimple *def_stmt;
0502fb85
UB
2200 enum vect_def_type dt[3]
2201 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
355fe088 2202 gimple *new_stmt = NULL;
ebfd146a 2203 int ncopies, j;
6e1aa848 2204 vec<tree> vargs = vNULL;
ebfd146a
IR
2205 enum { NARROW, NONE, WIDEN } modifier;
2206 size_t i, nargs;
9d5e7640 2207 tree lhs;
ebfd146a 2208
190c2236 2209 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2210 return false;
2211
66c16fd9
RB
2212 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2213 && ! vec_stmt)
ebfd146a
IR
2214 return false;
2215
538dd0b7
DM
2216 /* Is GS a vectorizable call? */
2217 stmt = dyn_cast <gcall *> (gs);
2218 if (!stmt)
ebfd146a
IR
2219 return false;
2220
5ce9450f
JJ
2221 if (gimple_call_internal_p (stmt)
2222 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2223 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2224 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2225 slp_node);
2226
0136f8f0
AH
2227 if (gimple_call_lhs (stmt) == NULL_TREE
2228 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2229 return false;
2230
0136f8f0 2231 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2232
b690cc0f
RG
2233 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2234
ebfd146a
IR
2235 /* Process function arguments. */
2236 rhs_type = NULL_TREE;
b690cc0f 2237 vectype_in = NULL_TREE;
ebfd146a
IR
2238 nargs = gimple_call_num_args (stmt);
2239
1b1562a5
MM
2240 /* Bail out if the function has more than three arguments, we do not have
2241 interesting builtin functions to vectorize with more than two arguments
2242 except for fma. No arguments is also not good. */
2243 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2244 return false;
2245
74bf76ed
JJ
2246 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2247 if (gimple_call_internal_p (stmt)
2248 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2249 {
2250 nargs = 0;
2251 rhs_type = unsigned_type_node;
2252 }
2253
ebfd146a
IR
2254 for (i = 0; i < nargs; i++)
2255 {
b690cc0f
RG
2256 tree opvectype;
2257
ebfd146a
IR
2258 op = gimple_call_arg (stmt, i);
2259
2260 /* We can only handle calls with arguments of the same type. */
2261 if (rhs_type
8533c9d8 2262 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2263 {
73fbfcad 2264 if (dump_enabled_p ())
78c60e3d 2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2266 "argument types differ.\n");
ebfd146a
IR
2267 return false;
2268 }
b690cc0f
RG
2269 if (!rhs_type)
2270 rhs_type = TREE_TYPE (op);
ebfd146a 2271
81c40241 2272 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2273 {
73fbfcad 2274 if (dump_enabled_p ())
78c60e3d 2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2276 "use not simple.\n");
ebfd146a
IR
2277 return false;
2278 }
ebfd146a 2279
b690cc0f
RG
2280 if (!vectype_in)
2281 vectype_in = opvectype;
2282 else if (opvectype
2283 && opvectype != vectype_in)
2284 {
73fbfcad 2285 if (dump_enabled_p ())
78c60e3d 2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2287 "argument vector types differ.\n");
b690cc0f
RG
2288 return false;
2289 }
2290 }
2291 /* If all arguments are external or constant defs use a vector type with
2292 the same size as the output vector type. */
ebfd146a 2293 if (!vectype_in)
b690cc0f 2294 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2295 if (vec_stmt)
2296 gcc_assert (vectype_in);
2297 if (!vectype_in)
2298 {
73fbfcad 2299 if (dump_enabled_p ())
7d8930a0 2300 {
78c60e3d
SS
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "no vectype for scalar type ");
2303 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2304 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2305 }
2306
2307 return false;
2308 }
ebfd146a
IR
2309
2310 /* FORNOW */
b690cc0f
RG
2311 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2312 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2313 if (nunits_in == nunits_out / 2)
2314 modifier = NARROW;
2315 else if (nunits_out == nunits_in)
2316 modifier = NONE;
2317 else if (nunits_out == nunits_in / 2)
2318 modifier = WIDEN;
2319 else
2320 return false;
2321
70439f0d
RS
2322 /* We only handle functions that do not read or clobber memory. */
2323 if (gimple_vuse (stmt))
2324 {
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2327 "function reads from or writes to memory.\n");
2328 return false;
2329 }
2330
ebfd146a
IR
2331 /* For now, we only vectorize functions if a target specific builtin
2332 is available. TODO -- in some cases, it might be profitable to
2333 insert the calls for pieces of the vector, in order to be able
2334 to vectorize other operations in the loop. */
70439f0d
RS
2335 fndecl = NULL_TREE;
2336 internal_fn ifn = IFN_LAST;
2337 combined_fn cfn = gimple_call_combined_fn (stmt);
2338 tree callee = gimple_call_fndecl (stmt);
2339
2340 /* First try using an internal function. */
b1b6836e
RS
2341 tree_code convert_code = ERROR_MARK;
2342 if (cfn != CFN_LAST
2343 && (modifier == NONE
2344 || (modifier == NARROW
2345 && simple_integer_narrowing (vectype_out, vectype_in,
2346 &convert_code))))
70439f0d
RS
2347 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2348 vectype_in);
2349
2350 /* If that fails, try asking for a target-specific built-in function. */
2351 if (ifn == IFN_LAST)
2352 {
2353 if (cfn != CFN_LAST)
2354 fndecl = targetm.vectorize.builtin_vectorized_function
2355 (cfn, vectype_out, vectype_in);
2356 else
2357 fndecl = targetm.vectorize.builtin_md_vectorized_function
2358 (callee, vectype_out, vectype_in);
2359 }
2360
2361 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2362 {
70439f0d 2363 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2364 && !slp_node
2365 && loop_vinfo
2366 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2367 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2368 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2369 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2370 {
2371 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2372 { 0, 1, 2, ... vf - 1 } vector. */
2373 gcc_assert (nargs == 0);
2374 }
2375 else
2376 {
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2379 "function is not vectorizable.\n");
74bf76ed
JJ
2380 return false;
2381 }
ebfd146a
IR
2382 }
2383
fce57248 2384 if (slp_node)
190c2236 2385 ncopies = 1;
b1b6836e 2386 else if (modifier == NARROW && ifn == IFN_LAST)
ebfd146a
IR
2387 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2388 else
2389 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2390
2391 /* Sanity check: make sure that at least one copy of the vectorized stmt
2392 needs to be generated. */
2393 gcc_assert (ncopies >= 1);
2394
2395 if (!vec_stmt) /* transformation not required. */
2396 {
2397 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2398 if (dump_enabled_p ())
e645e942
TJ
2399 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2400 "\n");
c3e7ee41 2401 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
b1b6836e
RS
2402 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2403 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2404 vec_promote_demote, stmt_info, 0, vect_body);
2405
ebfd146a
IR
2406 return true;
2407 }
2408
2409 /** Transform. **/
2410
73fbfcad 2411 if (dump_enabled_p ())
e645e942 2412 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2413
2414 /* Handle def. */
2415 scalar_dest = gimple_call_lhs (stmt);
2416 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2417
2418 prev_stmt_info = NULL;
b1b6836e 2419 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2420 {
b1b6836e 2421 tree prev_res = NULL_TREE;
ebfd146a
IR
2422 for (j = 0; j < ncopies; ++j)
2423 {
2424 /* Build argument list for the vectorized call. */
2425 if (j == 0)
9771b263 2426 vargs.create (nargs);
ebfd146a 2427 else
9771b263 2428 vargs.truncate (0);
ebfd146a 2429
190c2236
JJ
2430 if (slp_node)
2431 {
ef062b13 2432 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2433 vec<tree> vec_oprnds0;
190c2236
JJ
2434
2435 for (i = 0; i < nargs; i++)
9771b263 2436 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2437 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2438 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2439
2440 /* Arguments are ready. Create the new vector stmt. */
9771b263 2441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2442 {
2443 size_t k;
2444 for (k = 0; k < nargs; k++)
2445 {
37b5ec8f 2446 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2447 vargs[k] = vec_oprndsk[i];
190c2236 2448 }
b1b6836e
RS
2449 if (modifier == NARROW)
2450 {
2451 tree half_res = make_ssa_name (vectype_in);
2452 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2453 gimple_call_set_lhs (new_stmt, half_res);
2454 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2455 if ((i & 1) == 0)
2456 {
2457 prev_res = half_res;
2458 continue;
2459 }
2460 new_temp = make_ssa_name (vec_dest);
2461 new_stmt = gimple_build_assign (new_temp, convert_code,
2462 prev_res, half_res);
2463 }
70439f0d 2464 else
b1b6836e
RS
2465 {
2466 if (ifn != IFN_LAST)
2467 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2468 else
2469 new_stmt = gimple_build_call_vec (fndecl, vargs);
2470 new_temp = make_ssa_name (vec_dest, new_stmt);
2471 gimple_call_set_lhs (new_stmt, new_temp);
2472 }
190c2236 2473 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2474 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2475 }
2476
2477 for (i = 0; i < nargs; i++)
2478 {
37b5ec8f 2479 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2480 vec_oprndsi.release ();
190c2236 2481 }
190c2236
JJ
2482 continue;
2483 }
2484
ebfd146a
IR
2485 for (i = 0; i < nargs; i++)
2486 {
2487 op = gimple_call_arg (stmt, i);
2488 if (j == 0)
2489 vec_oprnd0
81c40241 2490 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2491 else
63827fb8
IR
2492 {
2493 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2494 vec_oprnd0
2495 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2496 }
ebfd146a 2497
9771b263 2498 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2499 }
2500
74bf76ed
JJ
2501 if (gimple_call_internal_p (stmt)
2502 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2503 {
2504 tree *v = XALLOCAVEC (tree, nunits_out);
2505 int k;
2506 for (k = 0; k < nunits_out; ++k)
2507 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2508 tree cst = build_vector (vectype_out, v);
2509 tree new_var
0e22bb5a 2510 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 2511 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 2512 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2513 new_temp = make_ssa_name (vec_dest);
0e22bb5a 2514 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 2515 }
b1b6836e
RS
2516 else if (modifier == NARROW)
2517 {
2518 tree half_res = make_ssa_name (vectype_in);
2519 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2520 gimple_call_set_lhs (new_stmt, half_res);
2521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2522 if ((j & 1) == 0)
2523 {
2524 prev_res = half_res;
2525 continue;
2526 }
2527 new_temp = make_ssa_name (vec_dest);
2528 new_stmt = gimple_build_assign (new_temp, convert_code,
2529 prev_res, half_res);
2530 }
74bf76ed
JJ
2531 else
2532 {
70439f0d
RS
2533 if (ifn != IFN_LAST)
2534 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2535 else
2536 new_stmt = gimple_build_call_vec (fndecl, vargs);
74bf76ed
JJ
2537 new_temp = make_ssa_name (vec_dest, new_stmt);
2538 gimple_call_set_lhs (new_stmt, new_temp);
2539 }
ebfd146a
IR
2540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2541
b1b6836e 2542 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
2543 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2544 else
2545 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2546
2547 prev_stmt_info = vinfo_for_stmt (new_stmt);
2548 }
b1b6836e
RS
2549 }
2550 else if (modifier == NARROW)
2551 {
ebfd146a
IR
2552 for (j = 0; j < ncopies; ++j)
2553 {
2554 /* Build argument list for the vectorized call. */
2555 if (j == 0)
9771b263 2556 vargs.create (nargs * 2);
ebfd146a 2557 else
9771b263 2558 vargs.truncate (0);
ebfd146a 2559
190c2236
JJ
2560 if (slp_node)
2561 {
ef062b13 2562 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2563 vec<tree> vec_oprnds0;
190c2236
JJ
2564
2565 for (i = 0; i < nargs; i++)
9771b263 2566 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2567 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2568 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2569
2570 /* Arguments are ready. Create the new vector stmt. */
9771b263 2571 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2572 {
2573 size_t k;
9771b263 2574 vargs.truncate (0);
190c2236
JJ
2575 for (k = 0; k < nargs; k++)
2576 {
37b5ec8f 2577 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2578 vargs.quick_push (vec_oprndsk[i]);
2579 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 2580 }
70439f0d
RS
2581 if (ifn != IFN_LAST)
2582 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2583 else
2584 new_stmt = gimple_build_call_vec (fndecl, vargs);
190c2236
JJ
2585 new_temp = make_ssa_name (vec_dest, new_stmt);
2586 gimple_call_set_lhs (new_stmt, new_temp);
2587 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2589 }
2590
2591 for (i = 0; i < nargs; i++)
2592 {
37b5ec8f 2593 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2594 vec_oprndsi.release ();
190c2236 2595 }
190c2236
JJ
2596 continue;
2597 }
2598
ebfd146a
IR
2599 for (i = 0; i < nargs; i++)
2600 {
2601 op = gimple_call_arg (stmt, i);
2602 if (j == 0)
2603 {
2604 vec_oprnd0
81c40241 2605 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2606 vec_oprnd1
63827fb8 2607 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2608 }
2609 else
2610 {
336ecb65 2611 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2612 vec_oprnd0
63827fb8 2613 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2614 vec_oprnd1
63827fb8 2615 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2616 }
2617
9771b263
DN
2618 vargs.quick_push (vec_oprnd0);
2619 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2620 }
2621
b1b6836e 2622 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
2623 new_temp = make_ssa_name (vec_dest, new_stmt);
2624 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2625 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2626
2627 if (j == 0)
2628 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2629 else
2630 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2631
2632 prev_stmt_info = vinfo_for_stmt (new_stmt);
2633 }
2634
2635 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 2636 }
b1b6836e
RS
2637 else
2638 /* No current target implements this case. */
2639 return false;
ebfd146a 2640
9771b263 2641 vargs.release ();
ebfd146a 2642
ebfd146a
IR
2643 /* The call in STMT might prevent it from being removed in dce.
2644 We however cannot remove it here, due to the way the ssa name
2645 it defines is mapped to the new definition. So just replace
2646 rhs of the statement with something harmless. */
2647
dd34c087
JJ
2648 if (slp_node)
2649 return true;
2650
ebfd146a 2651 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2652 if (is_pattern_stmt_p (stmt_info))
2653 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2654 else
2655 lhs = gimple_call_lhs (stmt);
3cc2fa2a 2656
9d5e7640 2657 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2658 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2659 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2660 STMT_VINFO_STMT (stmt_info) = new_stmt;
2661 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2662
2663 return true;
2664}
2665
2666
0136f8f0
AH
2667struct simd_call_arg_info
2668{
2669 tree vectype;
2670 tree op;
2671 enum vect_def_type dt;
2672 HOST_WIDE_INT linear_step;
2673 unsigned int align;
17b658af 2674 bool simd_lane_linear;
0136f8f0
AH
2675};
2676
17b658af
JJ
2677/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2678 is linear within simd lane (but not within whole loop), note it in
2679 *ARGINFO. */
2680
2681static void
2682vect_simd_lane_linear (tree op, struct loop *loop,
2683 struct simd_call_arg_info *arginfo)
2684{
355fe088 2685 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
2686
2687 if (!is_gimple_assign (def_stmt)
2688 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2689 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2690 return;
2691
2692 tree base = gimple_assign_rhs1 (def_stmt);
2693 HOST_WIDE_INT linear_step = 0;
2694 tree v = gimple_assign_rhs2 (def_stmt);
2695 while (TREE_CODE (v) == SSA_NAME)
2696 {
2697 tree t;
2698 def_stmt = SSA_NAME_DEF_STMT (v);
2699 if (is_gimple_assign (def_stmt))
2700 switch (gimple_assign_rhs_code (def_stmt))
2701 {
2702 case PLUS_EXPR:
2703 t = gimple_assign_rhs2 (def_stmt);
2704 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2705 return;
2706 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2707 v = gimple_assign_rhs1 (def_stmt);
2708 continue;
2709 case MULT_EXPR:
2710 t = gimple_assign_rhs2 (def_stmt);
2711 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2712 return;
2713 linear_step = tree_to_shwi (t);
2714 v = gimple_assign_rhs1 (def_stmt);
2715 continue;
2716 CASE_CONVERT:
2717 t = gimple_assign_rhs1 (def_stmt);
2718 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2719 || (TYPE_PRECISION (TREE_TYPE (v))
2720 < TYPE_PRECISION (TREE_TYPE (t))))
2721 return;
2722 if (!linear_step)
2723 linear_step = 1;
2724 v = t;
2725 continue;
2726 default:
2727 return;
2728 }
2729 else if (is_gimple_call (def_stmt)
2730 && gimple_call_internal_p (def_stmt)
2731 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2732 && loop->simduid
2733 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2734 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2735 == loop->simduid))
2736 {
2737 if (!linear_step)
2738 linear_step = 1;
2739 arginfo->linear_step = linear_step;
2740 arginfo->op = base;
2741 arginfo->simd_lane_linear = true;
2742 return;
2743 }
2744 }
2745}
2746
0136f8f0
AH
2747/* Function vectorizable_simd_clone_call.
2748
2749 Check if STMT performs a function call that can be vectorized
2750 by calling a simd clone of the function.
2751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2752 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2753 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2754
2755static bool
355fe088
TS
2756vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2757 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
2758{
2759 tree vec_dest;
2760 tree scalar_dest;
2761 tree op, type;
2762 tree vec_oprnd0 = NULL_TREE;
2763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2764 tree vectype;
2765 unsigned int nunits;
2766 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2767 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2768 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 2769 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 2770 tree fndecl, new_temp;
355fe088
TS
2771 gimple *def_stmt;
2772 gimple *new_stmt = NULL;
0136f8f0 2773 int ncopies, j;
00426f9a 2774 auto_vec<simd_call_arg_info> arginfo;
0136f8f0
AH
2775 vec<tree> vargs = vNULL;
2776 size_t i, nargs;
2777 tree lhs, rtype, ratype;
2778 vec<constructor_elt, va_gc> *ret_ctor_elts;
2779
2780 /* Is STMT a vectorizable call? */
2781 if (!is_gimple_call (stmt))
2782 return false;
2783
2784 fndecl = gimple_call_fndecl (stmt);
2785 if (fndecl == NULL_TREE)
2786 return false;
2787
d52f5295 2788 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
2789 if (node == NULL || node->simd_clones == NULL)
2790 return false;
2791
2792 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2793 return false;
2794
66c16fd9
RB
2795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2796 && ! vec_stmt)
0136f8f0
AH
2797 return false;
2798
2799 if (gimple_call_lhs (stmt)
2800 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2801 return false;
2802
2803 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2804
2805 vectype = STMT_VINFO_VECTYPE (stmt_info);
2806
2807 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2808 return false;
2809
2810 /* FORNOW */
fce57248 2811 if (slp_node)
0136f8f0
AH
2812 return false;
2813
2814 /* Process function arguments. */
2815 nargs = gimple_call_num_args (stmt);
2816
2817 /* Bail out if the function has zero arguments. */
2818 if (nargs == 0)
2819 return false;
2820
00426f9a 2821 arginfo.reserve (nargs, true);
0136f8f0
AH
2822
2823 for (i = 0; i < nargs; i++)
2824 {
2825 simd_call_arg_info thisarginfo;
2826 affine_iv iv;
2827
2828 thisarginfo.linear_step = 0;
2829 thisarginfo.align = 0;
2830 thisarginfo.op = NULL_TREE;
17b658af 2831 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
2832
2833 op = gimple_call_arg (stmt, i);
81c40241
RB
2834 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2835 &thisarginfo.vectype)
0136f8f0
AH
2836 || thisarginfo.dt == vect_uninitialized_def)
2837 {
2838 if (dump_enabled_p ())
2839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2840 "use not simple.\n");
0136f8f0
AH
2841 return false;
2842 }
2843
2844 if (thisarginfo.dt == vect_constant_def
2845 || thisarginfo.dt == vect_external_def)
2846 gcc_assert (thisarginfo.vectype == NULL_TREE);
2847 else
2848 gcc_assert (thisarginfo.vectype != NULL_TREE);
2849
6c9e85fb
JJ
2850 /* For linear arguments, the analyze phase should have saved
2851 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
2852 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2853 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
2854 {
2855 gcc_assert (vec_stmt);
2856 thisarginfo.linear_step
17b658af 2857 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 2858 thisarginfo.op
17b658af
JJ
2859 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2860 thisarginfo.simd_lane_linear
2861 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2862 == boolean_true_node);
6c9e85fb
JJ
2863 /* If loop has been peeled for alignment, we need to adjust it. */
2864 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2865 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 2866 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
2867 {
2868 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 2869 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
2870 tree opt = TREE_TYPE (thisarginfo.op);
2871 bias = fold_convert (TREE_TYPE (step), bias);
2872 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2873 thisarginfo.op
2874 = fold_build2 (POINTER_TYPE_P (opt)
2875 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2876 thisarginfo.op, bias);
2877 }
2878 }
2879 else if (!vec_stmt
2880 && thisarginfo.dt != vect_constant_def
2881 && thisarginfo.dt != vect_external_def
2882 && loop_vinfo
2883 && TREE_CODE (op) == SSA_NAME
2884 && simple_iv (loop, loop_containing_stmt (stmt), op,
2885 &iv, false)
2886 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
2887 {
2888 thisarginfo.linear_step = tree_to_shwi (iv.step);
2889 thisarginfo.op = iv.base;
2890 }
2891 else if ((thisarginfo.dt == vect_constant_def
2892 || thisarginfo.dt == vect_external_def)
2893 && POINTER_TYPE_P (TREE_TYPE (op)))
2894 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
2895 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2896 linear too. */
2897 if (POINTER_TYPE_P (TREE_TYPE (op))
2898 && !thisarginfo.linear_step
2899 && !vec_stmt
2900 && thisarginfo.dt != vect_constant_def
2901 && thisarginfo.dt != vect_external_def
2902 && loop_vinfo
2903 && !slp_node
2904 && TREE_CODE (op) == SSA_NAME)
2905 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
2906
2907 arginfo.quick_push (thisarginfo);
2908 }
2909
2910 unsigned int badness = 0;
2911 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
2912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2913 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
2914 else
2915 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2916 n = n->simdclone->next_clone)
2917 {
2918 unsigned int this_badness = 0;
2919 if (n->simdclone->simdlen
2920 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2921 || n->simdclone->nargs != nargs)
2922 continue;
2923 if (n->simdclone->simdlen
2924 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2925 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2926 - exact_log2 (n->simdclone->simdlen)) * 1024;
2927 if (n->simdclone->inbranch)
2928 this_badness += 2048;
2929 int target_badness = targetm.simd_clone.usable (n);
2930 if (target_badness < 0)
2931 continue;
2932 this_badness += target_badness * 512;
2933 /* FORNOW: Have to add code to add the mask argument. */
2934 if (n->simdclone->inbranch)
2935 continue;
2936 for (i = 0; i < nargs; i++)
2937 {
2938 switch (n->simdclone->args[i].arg_type)
2939 {
2940 case SIMD_CLONE_ARG_TYPE_VECTOR:
2941 if (!useless_type_conversion_p
2942 (n->simdclone->args[i].orig_type,
2943 TREE_TYPE (gimple_call_arg (stmt, i))))
2944 i = -1;
2945 else if (arginfo[i].dt == vect_constant_def
2946 || arginfo[i].dt == vect_external_def
2947 || arginfo[i].linear_step)
2948 this_badness += 64;
2949 break;
2950 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2951 if (arginfo[i].dt != vect_constant_def
2952 && arginfo[i].dt != vect_external_def)
2953 i = -1;
2954 break;
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 2956 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
2957 if (arginfo[i].dt == vect_constant_def
2958 || arginfo[i].dt == vect_external_def
2959 || (arginfo[i].linear_step
2960 != n->simdclone->args[i].linear_step))
2961 i = -1;
2962 break;
2963 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
2964 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2965 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
2966 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2967 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2968 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
2969 /* FORNOW */
2970 i = -1;
2971 break;
2972 case SIMD_CLONE_ARG_TYPE_MASK:
2973 gcc_unreachable ();
2974 }
2975 if (i == (size_t) -1)
2976 break;
2977 if (n->simdclone->args[i].alignment > arginfo[i].align)
2978 {
2979 i = -1;
2980 break;
2981 }
2982 if (arginfo[i].align)
2983 this_badness += (exact_log2 (arginfo[i].align)
2984 - exact_log2 (n->simdclone->args[i].alignment));
2985 }
2986 if (i == (size_t) -1)
2987 continue;
2988 if (bestn == NULL || this_badness < badness)
2989 {
2990 bestn = n;
2991 badness = this_badness;
2992 }
2993 }
2994
2995 if (bestn == NULL)
00426f9a 2996 return false;
0136f8f0
AH
2997
2998 for (i = 0; i < nargs; i++)
2999 if ((arginfo[i].dt == vect_constant_def
3000 || arginfo[i].dt == vect_external_def)
3001 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3002 {
3003 arginfo[i].vectype
3004 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3005 i)));
3006 if (arginfo[i].vectype == NULL
3007 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3008 > bestn->simdclone->simdlen))
00426f9a 3009 return false;
0136f8f0
AH
3010 }
3011
3012 fndecl = bestn->decl;
3013 nunits = bestn->simdclone->simdlen;
3014 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3015
3016 /* If the function isn't const, only allow it in simd loops where user
3017 has asserted that at least nunits consecutive iterations can be
3018 performed using SIMD instructions. */
3019 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3020 && gimple_vuse (stmt))
00426f9a 3021 return false;
0136f8f0
AH
3022
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies >= 1);
3026
3027 if (!vec_stmt) /* transformation not required. */
3028 {
6c9e85fb
JJ
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3030 for (i = 0; i < nargs; i++)
7adb26f2
JJ
3031 if ((bestn->simdclone->args[i].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3033 || (bestn->simdclone->args[i].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
6c9e85fb 3035 {
17b658af 3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3037 + 1);
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3039 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3040 ? size_type_node : TREE_TYPE (arginfo[i].op);
3041 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3043 tree sll = arginfo[i].simd_lane_linear
3044 ? boolean_true_node : boolean_false_node;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3046 }
0136f8f0
AH
3047 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE, vect_location,
3050 "=== vectorizable_simd_clone_call ===\n");
3051/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
0136f8f0
AH
3052 return true;
3053 }
3054
3055 /** Transform. **/
3056
3057 if (dump_enabled_p ())
3058 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3059
3060 /* Handle def. */
3061 scalar_dest = gimple_call_lhs (stmt);
3062 vec_dest = NULL_TREE;
3063 rtype = NULL_TREE;
3064 ratype = NULL_TREE;
3065 if (scalar_dest)
3066 {
3067 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3068 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3069 if (TREE_CODE (rtype) == ARRAY_TYPE)
3070 {
3071 ratype = rtype;
3072 rtype = TREE_TYPE (ratype);
3073 }
3074 }
3075
3076 prev_stmt_info = NULL;
3077 for (j = 0; j < ncopies; ++j)
3078 {
3079 /* Build argument list for the vectorized call. */
3080 if (j == 0)
3081 vargs.create (nargs);
3082 else
3083 vargs.truncate (0);
3084
3085 for (i = 0; i < nargs; i++)
3086 {
3087 unsigned int k, l, m, o;
3088 tree atype;
3089 op = gimple_call_arg (stmt, i);
3090 switch (bestn->simdclone->args[i].arg_type)
3091 {
3092 case SIMD_CLONE_ARG_TYPE_VECTOR:
3093 atype = bestn->simdclone->args[i].vector_type;
3094 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3095 for (m = j * o; m < (j + 1) * o; m++)
3096 {
3097 if (TYPE_VECTOR_SUBPARTS (atype)
3098 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3099 {
3100 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3101 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3102 / TYPE_VECTOR_SUBPARTS (atype));
3103 gcc_assert ((k & (k - 1)) == 0);
3104 if (m == 0)
3105 vec_oprnd0
81c40241 3106 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3107 else
3108 {
3109 vec_oprnd0 = arginfo[i].op;
3110 if ((m & (k - 1)) == 0)
3111 vec_oprnd0
3112 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3113 vec_oprnd0);
3114 }
3115 arginfo[i].op = vec_oprnd0;
3116 vec_oprnd0
3117 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3118 size_int (prec),
3119 bitsize_int ((m & (k - 1)) * prec));
3120 new_stmt
b731b390 3121 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3122 vec_oprnd0);
3123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3124 vargs.safe_push (gimple_assign_lhs (new_stmt));
3125 }
3126 else
3127 {
3128 k = (TYPE_VECTOR_SUBPARTS (atype)
3129 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3130 gcc_assert ((k & (k - 1)) == 0);
3131 vec<constructor_elt, va_gc> *ctor_elts;
3132 if (k != 1)
3133 vec_alloc (ctor_elts, k);
3134 else
3135 ctor_elts = NULL;
3136 for (l = 0; l < k; l++)
3137 {
3138 if (m == 0 && l == 0)
3139 vec_oprnd0
81c40241 3140 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3141 else
3142 vec_oprnd0
3143 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3144 arginfo[i].op);
3145 arginfo[i].op = vec_oprnd0;
3146 if (k == 1)
3147 break;
3148 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3149 vec_oprnd0);
3150 }
3151 if (k == 1)
3152 vargs.safe_push (vec_oprnd0);
3153 else
3154 {
3155 vec_oprnd0 = build_constructor (atype, ctor_elts);
3156 new_stmt
b731b390 3157 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3158 vec_oprnd0);
3159 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3160 vargs.safe_push (gimple_assign_lhs (new_stmt));
3161 }
3162 }
3163 }
3164 break;
3165 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3166 vargs.safe_push (op);
3167 break;
3168 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
7adb26f2 3169 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
3170 if (j == 0)
3171 {
3172 gimple_seq stmts;
3173 arginfo[i].op
3174 = force_gimple_operand (arginfo[i].op, &stmts, true,
3175 NULL_TREE);
3176 if (stmts != NULL)
3177 {
3178 basic_block new_bb;
3179 edge pe = loop_preheader_edge (loop);
3180 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3181 gcc_assert (!new_bb);
3182 }
17b658af
JJ
3183 if (arginfo[i].simd_lane_linear)
3184 {
3185 vargs.safe_push (arginfo[i].op);
3186 break;
3187 }
b731b390 3188 tree phi_res = copy_ssa_name (op);
538dd0b7 3189 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3190 set_vinfo_for_stmt (new_phi,
310213d4 3191 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3192 add_phi_arg (new_phi, arginfo[i].op,
3193 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3194 enum tree_code code
3195 = POINTER_TYPE_P (TREE_TYPE (op))
3196 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3197 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3198 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3199 widest_int cst
3200 = wi::mul (bestn->simdclone->args[i].linear_step,
3201 ncopies * nunits);
3202 tree tcst = wide_int_to_tree (type, cst);
b731b390 3203 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3204 new_stmt
3205 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3206 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3207 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3208 set_vinfo_for_stmt (new_stmt,
310213d4 3209 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3210 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3211 UNKNOWN_LOCATION);
3212 arginfo[i].op = phi_res;
3213 vargs.safe_push (phi_res);
3214 }
3215 else
3216 {
3217 enum tree_code code
3218 = POINTER_TYPE_P (TREE_TYPE (op))
3219 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3220 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3221 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3222 widest_int cst
3223 = wi::mul (bestn->simdclone->args[i].linear_step,
3224 j * nunits);
3225 tree tcst = wide_int_to_tree (type, cst);
b731b390 3226 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3227 new_stmt = gimple_build_assign (new_temp, code,
3228 arginfo[i].op, tcst);
0136f8f0
AH
3229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3230 vargs.safe_push (new_temp);
3231 }
3232 break;
7adb26f2
JJ
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
0136f8f0 3235 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3239 default:
3240 gcc_unreachable ();
3241 }
3242 }
3243
3244 new_stmt = gimple_build_call_vec (fndecl, vargs);
3245 if (vec_dest)
3246 {
3247 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3248 if (ratype)
b731b390 3249 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3250 else if (TYPE_VECTOR_SUBPARTS (vectype)
3251 == TYPE_VECTOR_SUBPARTS (rtype))
3252 new_temp = make_ssa_name (vec_dest, new_stmt);
3253 else
3254 new_temp = make_ssa_name (rtype, new_stmt);
3255 gimple_call_set_lhs (new_stmt, new_temp);
3256 }
3257 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3258
3259 if (vec_dest)
3260 {
3261 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3262 {
3263 unsigned int k, l;
3264 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3265 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3266 gcc_assert ((k & (k - 1)) == 0);
3267 for (l = 0; l < k; l++)
3268 {
3269 tree t;
3270 if (ratype)
3271 {
3272 t = build_fold_addr_expr (new_temp);
3273 t = build2 (MEM_REF, vectype, t,
3274 build_int_cst (TREE_TYPE (t),
3275 l * prec / BITS_PER_UNIT));
3276 }
3277 else
3278 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3279 size_int (prec), bitsize_int (l * prec));
3280 new_stmt
b731b390 3281 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3283 if (j == 0 && l == 0)
3284 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3285 else
3286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3287
3288 prev_stmt_info = vinfo_for_stmt (new_stmt);
3289 }
3290
3291 if (ratype)
3292 {
3293 tree clobber = build_constructor (ratype, NULL);
3294 TREE_THIS_VOLATILE (clobber) = 1;
3295 new_stmt = gimple_build_assign (new_temp, clobber);
3296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3297 }
3298 continue;
3299 }
3300 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3301 {
3302 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3303 / TYPE_VECTOR_SUBPARTS (rtype));
3304 gcc_assert ((k & (k - 1)) == 0);
3305 if ((j & (k - 1)) == 0)
3306 vec_alloc (ret_ctor_elts, k);
3307 if (ratype)
3308 {
3309 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3310 for (m = 0; m < o; m++)
3311 {
3312 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3313 size_int (m), NULL_TREE, NULL_TREE);
3314 new_stmt
b731b390 3315 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3316 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3318 gimple_assign_lhs (new_stmt));
3319 }
3320 tree clobber = build_constructor (ratype, NULL);
3321 TREE_THIS_VOLATILE (clobber) = 1;
3322 new_stmt = gimple_build_assign (new_temp, clobber);
3323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3324 }
3325 else
3326 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3327 if ((j & (k - 1)) != k - 1)
3328 continue;
3329 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3330 new_stmt
b731b390 3331 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3332 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3333
3334 if ((unsigned) j == k - 1)
3335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3336 else
3337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3338
3339 prev_stmt_info = vinfo_for_stmt (new_stmt);
3340 continue;
3341 }
3342 else if (ratype)
3343 {
3344 tree t = build_fold_addr_expr (new_temp);
3345 t = build2 (MEM_REF, vectype, t,
3346 build_int_cst (TREE_TYPE (t), 0));
3347 new_stmt
b731b390 3348 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3349 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3350 tree clobber = build_constructor (ratype, NULL);
3351 TREE_THIS_VOLATILE (clobber) = 1;
3352 vect_finish_stmt_generation (stmt,
3353 gimple_build_assign (new_temp,
3354 clobber), gsi);
3355 }
3356 }
3357
3358 if (j == 0)
3359 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3360 else
3361 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3362
3363 prev_stmt_info = vinfo_for_stmt (new_stmt);
3364 }
3365
3366 vargs.release ();
3367
3368 /* The call in STMT might prevent it from being removed in dce.
3369 We however cannot remove it here, due to the way the ssa name
3370 it defines is mapped to the new definition. So just replace
3371 rhs of the statement with something harmless. */
3372
3373 if (slp_node)
3374 return true;
3375
3376 if (scalar_dest)
3377 {
3378 type = TREE_TYPE (scalar_dest);
3379 if (is_pattern_stmt_p (stmt_info))
3380 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3381 else
3382 lhs = gimple_call_lhs (stmt);
3383 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3384 }
3385 else
3386 new_stmt = gimple_build_nop ();
3387 set_vinfo_for_stmt (new_stmt, stmt_info);
3388 set_vinfo_for_stmt (stmt, NULL);
3389 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3390 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3391 unlink_stmt_vdef (stmt);
3392
3393 return true;
3394}
3395
3396
ebfd146a
IR
3397/* Function vect_gen_widened_results_half
3398
3399 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3400 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3401 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3402 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3403 needs to be created (DECL is a function-decl of a target-builtin).
3404 STMT is the original scalar stmt that we are vectorizing. */
3405
355fe088 3406static gimple *
ebfd146a
IR
3407vect_gen_widened_results_half (enum tree_code code,
3408 tree decl,
3409 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3410 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3411 gimple *stmt)
b8698a0f 3412{
355fe088 3413 gimple *new_stmt;
b8698a0f
L
3414 tree new_temp;
3415
3416 /* Generate half of the widened result: */
3417 if (code == CALL_EXPR)
3418 {
3419 /* Target specific support */
ebfd146a
IR
3420 if (op_type == binary_op)
3421 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3422 else
3423 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3424 new_temp = make_ssa_name (vec_dest, new_stmt);
3425 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3426 }
3427 else
ebfd146a 3428 {
b8698a0f
L
3429 /* Generic support */
3430 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3431 if (op_type != binary_op)
3432 vec_oprnd1 = NULL;
0d0e4a03 3433 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3434 new_temp = make_ssa_name (vec_dest, new_stmt);
3435 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3436 }
ebfd146a
IR
3437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3438
ebfd146a
IR
3439 return new_stmt;
3440}
3441
4a00c761
JJ
3442
3443/* Get vectorized definitions for loop-based vectorization. For the first
3444 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3445 scalar operand), and for the rest we get a copy with
3446 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3447 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3448 The vectors are collected into VEC_OPRNDS. */
3449
3450static void
355fe088 3451vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3452 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3453{
3454 tree vec_oprnd;
3455
3456 /* Get first vector operand. */
3457 /* All the vector operands except the very first one (that is scalar oprnd)
3458 are stmt copies. */
3459 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3460 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
3461 else
3462 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3463
9771b263 3464 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3465
3466 /* Get second vector operand. */
3467 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3468 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3469
3470 *oprnd = vec_oprnd;
3471
3472 /* For conversion in multiple steps, continue to get operands
3473 recursively. */
3474 if (multi_step_cvt)
3475 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3476}
3477
3478
3479/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3480 For multi-step conversions store the resulting vectors and call the function
3481 recursively. */
3482
3483static void
9771b263 3484vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 3485 int multi_step_cvt, gimple *stmt,
9771b263 3486 vec<tree> vec_dsts,
4a00c761
JJ
3487 gimple_stmt_iterator *gsi,
3488 slp_tree slp_node, enum tree_code code,
3489 stmt_vec_info *prev_stmt_info)
3490{
3491 unsigned int i;
3492 tree vop0, vop1, new_tmp, vec_dest;
355fe088 3493 gimple *new_stmt;
4a00c761
JJ
3494 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3495
9771b263 3496 vec_dest = vec_dsts.pop ();
4a00c761 3497
9771b263 3498 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3499 {
3500 /* Create demotion operation. */
9771b263
DN
3501 vop0 = (*vec_oprnds)[i];
3502 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3503 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3504 new_tmp = make_ssa_name (vec_dest, new_stmt);
3505 gimple_assign_set_lhs (new_stmt, new_tmp);
3506 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3507
3508 if (multi_step_cvt)
3509 /* Store the resulting vector for next recursive call. */
9771b263 3510 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3511 else
3512 {
3513 /* This is the last step of the conversion sequence. Store the
3514 vectors in SLP_NODE or in vector info of the scalar statement
3515 (or in STMT_VINFO_RELATED_STMT chain). */
3516 if (slp_node)
9771b263 3517 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 3518 else
c689ce1e
RB
3519 {
3520 if (!*prev_stmt_info)
3521 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3522 else
3523 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 3524
c689ce1e
RB
3525 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3526 }
4a00c761
JJ
3527 }
3528 }
3529
3530 /* For multi-step demotion operations we first generate demotion operations
3531 from the source type to the intermediate types, and then combine the
3532 results (stored in VEC_OPRNDS) in demotion operation to the destination
3533 type. */
3534 if (multi_step_cvt)
3535 {
3536 /* At each level of recursion we have half of the operands we had at the
3537 previous level. */
9771b263 3538 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3539 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3540 stmt, vec_dsts, gsi, slp_node,
3541 VEC_PACK_TRUNC_EXPR,
3542 prev_stmt_info);
3543 }
3544
9771b263 3545 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3546}
3547
3548
3549/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3550 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3551 the resulting vectors and call the function recursively. */
3552
3553static void
9771b263
DN
3554vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3555 vec<tree> *vec_oprnds1,
355fe088 3556 gimple *stmt, tree vec_dest,
4a00c761
JJ
3557 gimple_stmt_iterator *gsi,
3558 enum tree_code code1,
3559 enum tree_code code2, tree decl1,
3560 tree decl2, int op_type)
3561{
3562 int i;
3563 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 3564 gimple *new_stmt1, *new_stmt2;
6e1aa848 3565 vec<tree> vec_tmp = vNULL;
4a00c761 3566
9771b263
DN
3567 vec_tmp.create (vec_oprnds0->length () * 2);
3568 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3569 {
3570 if (op_type == binary_op)
9771b263 3571 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3572 else
3573 vop1 = NULL_TREE;
3574
3575 /* Generate the two halves of promotion operation. */
3576 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3577 op_type, vec_dest, gsi, stmt);
3578 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3579 op_type, vec_dest, gsi, stmt);
3580 if (is_gimple_call (new_stmt1))
3581 {
3582 new_tmp1 = gimple_call_lhs (new_stmt1);
3583 new_tmp2 = gimple_call_lhs (new_stmt2);
3584 }
3585 else
3586 {
3587 new_tmp1 = gimple_assign_lhs (new_stmt1);
3588 new_tmp2 = gimple_assign_lhs (new_stmt2);
3589 }
3590
3591 /* Store the results for the next step. */
9771b263
DN
3592 vec_tmp.quick_push (new_tmp1);
3593 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3594 }
3595
689eaba3 3596 vec_oprnds0->release ();
4a00c761
JJ
3597 *vec_oprnds0 = vec_tmp;
3598}
3599
3600
b8698a0f
L
3601/* Check if STMT performs a conversion operation, that can be vectorized.
3602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3603 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3605
3606static bool
355fe088
TS
3607vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3608 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
3609{
3610 tree vec_dest;
3611 tree scalar_dest;
4a00c761 3612 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3613 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3614 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3615 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3616 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3617 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3618 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3619 tree new_temp;
355fe088 3620 gimple *def_stmt;
ebfd146a 3621 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
355fe088 3622 gimple *new_stmt = NULL;
ebfd146a
IR
3623 stmt_vec_info prev_stmt_info;
3624 int nunits_in;
3625 int nunits_out;
3626 tree vectype_out, vectype_in;
4a00c761
JJ
3627 int ncopies, i, j;
3628 tree lhs_type, rhs_type;
ebfd146a 3629 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3630 vec<tree> vec_oprnds0 = vNULL;
3631 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3632 tree vop0;
4a00c761 3633 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3634 vec_info *vinfo = stmt_info->vinfo;
4a00c761 3635 int multi_step_cvt = 0;
6e1aa848
DN
3636 vec<tree> vec_dsts = vNULL;
3637 vec<tree> interm_types = vNULL;
4a00c761
JJ
3638 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3639 int op_type;
ef4bddc2 3640 machine_mode rhs_mode;
4a00c761 3641 unsigned short fltsz;
ebfd146a
IR
3642
3643 /* Is STMT a vectorizable conversion? */
3644
4a00c761 3645 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3646 return false;
3647
66c16fd9
RB
3648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3649 && ! vec_stmt)
ebfd146a
IR
3650 return false;
3651
3652 if (!is_gimple_assign (stmt))
3653 return false;
3654
3655 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3656 return false;
3657
3658 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3659 if (!CONVERT_EXPR_CODE_P (code)
3660 && code != FIX_TRUNC_EXPR
3661 && code != FLOAT_EXPR
3662 && code != WIDEN_MULT_EXPR
3663 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3664 return false;
3665
4a00c761
JJ
3666 op_type = TREE_CODE_LENGTH (code);
3667
ebfd146a 3668 /* Check types of lhs and rhs. */
b690cc0f 3669 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3670 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3671 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3672
ebfd146a
IR
3673 op0 = gimple_assign_rhs1 (stmt);
3674 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3675
3676 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3677 && !((INTEGRAL_TYPE_P (lhs_type)
3678 && INTEGRAL_TYPE_P (rhs_type))
3679 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3680 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3681 return false;
3682
e6f5c25d
IE
3683 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3684 && ((INTEGRAL_TYPE_P (lhs_type)
3685 && (TYPE_PRECISION (lhs_type)
3686 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3687 || (INTEGRAL_TYPE_P (rhs_type)
3688 && (TYPE_PRECISION (rhs_type)
3689 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4a00c761 3690 {
73fbfcad 3691 if (dump_enabled_p ())
78c60e3d 3692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3693 "type conversion to/from bit-precision unsupported."
3694 "\n");
4a00c761
JJ
3695 return false;
3696 }
3697
b690cc0f 3698 /* Check the operands of the operation. */
81c40241 3699 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 3700 {
73fbfcad 3701 if (dump_enabled_p ())
78c60e3d 3702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3703 "use not simple.\n");
b690cc0f
RG
3704 return false;
3705 }
4a00c761
JJ
3706 if (op_type == binary_op)
3707 {
3708 bool ok;
3709
3710 op1 = gimple_assign_rhs2 (stmt);
3711 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3712 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3713 OP1. */
3714 if (CONSTANT_CLASS_P (op0))
81c40241 3715 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 3716 else
81c40241 3717 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
3718
3719 if (!ok)
3720 {
73fbfcad 3721 if (dump_enabled_p ())
78c60e3d 3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3723 "use not simple.\n");
4a00c761
JJ
3724 return false;
3725 }
3726 }
3727
b690cc0f
RG
3728 /* If op0 is an external or constant defs use a vector type of
3729 the same size as the output vector type. */
ebfd146a 3730 if (!vectype_in)
b690cc0f 3731 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3732 if (vec_stmt)
3733 gcc_assert (vectype_in);
3734 if (!vectype_in)
3735 {
73fbfcad 3736 if (dump_enabled_p ())
4a00c761 3737 {
78c60e3d
SS
3738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3739 "no vectype for scalar type ");
3740 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3741 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3742 }
7d8930a0
IR
3743
3744 return false;
3745 }
ebfd146a 3746
e6f5c25d
IE
3747 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3748 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3749 {
3750 if (dump_enabled_p ())
3751 {
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3753 "can't convert between boolean and non "
3754 "boolean vectors");
3755 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3756 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3757 }
3758
3759 return false;
3760 }
3761
b690cc0f
RG
3762 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3763 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3764 if (nunits_in < nunits_out)
ebfd146a
IR
3765 modifier = NARROW;
3766 else if (nunits_out == nunits_in)
3767 modifier = NONE;
ebfd146a 3768 else
4a00c761 3769 modifier = WIDEN;
ebfd146a 3770
ff802fa1
IR
3771 /* Multiple types in SLP are handled by creating the appropriate number of
3772 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3773 case of SLP. */
fce57248 3774 if (slp_node)
ebfd146a 3775 ncopies = 1;
4a00c761
JJ
3776 else if (modifier == NARROW)
3777 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3778 else
3779 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3780
ebfd146a
IR
3781 /* Sanity check: make sure that at least one copy of the vectorized stmt
3782 needs to be generated. */
3783 gcc_assert (ncopies >= 1);
3784
ebfd146a 3785 /* Supportable by target? */
4a00c761 3786 switch (modifier)
ebfd146a 3787 {
4a00c761
JJ
3788 case NONE:
3789 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3790 return false;
3791 if (supportable_convert_operation (code, vectype_out, vectype_in,
3792 &decl1, &code1))
3793 break;
3794 /* FALLTHRU */
3795 unsupported:
73fbfcad 3796 if (dump_enabled_p ())
78c60e3d 3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3798 "conversion not supported by target.\n");
ebfd146a 3799 return false;
ebfd146a 3800
4a00c761
JJ
3801 case WIDEN:
3802 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3803 &code1, &code2, &multi_step_cvt,
3804 &interm_types))
4a00c761
JJ
3805 {
3806 /* Binary widening operation can only be supported directly by the
3807 architecture. */
3808 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3809 break;
3810 }
3811
3812 if (code != FLOAT_EXPR
3813 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3814 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3815 goto unsupported;
3816
3817 rhs_mode = TYPE_MODE (rhs_type);
3818 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3819 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3820 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3821 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3822 {
3823 cvt_type
3824 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3825 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3826 if (cvt_type == NULL_TREE)
3827 goto unsupported;
3828
3829 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3830 {
3831 if (!supportable_convert_operation (code, vectype_out,
3832 cvt_type, &decl1, &codecvt1))
3833 goto unsupported;
3834 }
3835 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3836 cvt_type, &codecvt1,
3837 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3838 &interm_types))
3839 continue;
3840 else
3841 gcc_assert (multi_step_cvt == 0);
3842
3843 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3844 vectype_in, &code1, &code2,
3845 &multi_step_cvt, &interm_types))
4a00c761
JJ
3846 break;
3847 }
3848
3849 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3850 goto unsupported;
3851
3852 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3853 codecvt2 = ERROR_MARK;
3854 else
3855 {
3856 multi_step_cvt++;
9771b263 3857 interm_types.safe_push (cvt_type);
4a00c761
JJ
3858 cvt_type = NULL_TREE;
3859 }
3860 break;
3861
3862 case NARROW:
3863 gcc_assert (op_type == unary_op);
3864 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3865 &code1, &multi_step_cvt,
3866 &interm_types))
3867 break;
3868
3869 if (code != FIX_TRUNC_EXPR
3870 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3871 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3872 goto unsupported;
3873
3874 rhs_mode = TYPE_MODE (rhs_type);
3875 cvt_type
3876 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3877 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3878 if (cvt_type == NULL_TREE)
3879 goto unsupported;
3880 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3881 &decl1, &codecvt1))
3882 goto unsupported;
3883 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3884 &code1, &multi_step_cvt,
3885 &interm_types))
3886 break;
3887 goto unsupported;
3888
3889 default:
3890 gcc_unreachable ();
ebfd146a
IR
3891 }
3892
3893 if (!vec_stmt) /* transformation not required. */
3894 {
73fbfcad 3895 if (dump_enabled_p ())
78c60e3d 3896 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3897 "=== vectorizable_conversion ===\n");
4a00c761 3898 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3899 {
3900 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3901 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3902 }
4a00c761
JJ
3903 else if (modifier == NARROW)
3904 {
3905 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3906 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3907 }
3908 else
3909 {
3910 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3911 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3912 }
9771b263 3913 interm_types.release ();
ebfd146a
IR
3914 return true;
3915 }
3916
3917 /** Transform. **/
73fbfcad 3918 if (dump_enabled_p ())
78c60e3d 3919 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3920 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3921
4a00c761
JJ
3922 if (op_type == binary_op)
3923 {
3924 if (CONSTANT_CLASS_P (op0))
3925 op0 = fold_convert (TREE_TYPE (op1), op0);
3926 else if (CONSTANT_CLASS_P (op1))
3927 op1 = fold_convert (TREE_TYPE (op0), op1);
3928 }
3929
3930 /* In case of multi-step conversion, we first generate conversion operations
3931 to the intermediate types, and then from that types to the final one.
3932 We create vector destinations for the intermediate type (TYPES) received
3933 from supportable_*_operation, and store them in the correct order
3934 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3935 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3936 vec_dest = vect_create_destination_var (scalar_dest,
3937 (cvt_type && modifier == WIDEN)
3938 ? cvt_type : vectype_out);
9771b263 3939 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3940
3941 if (multi_step_cvt)
3942 {
9771b263
DN
3943 for (i = interm_types.length () - 1;
3944 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3945 {
3946 vec_dest = vect_create_destination_var (scalar_dest,
3947 intermediate_type);
9771b263 3948 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3949 }
3950 }
ebfd146a 3951
4a00c761 3952 if (cvt_type)
82294ec1
JJ
3953 vec_dest = vect_create_destination_var (scalar_dest,
3954 modifier == WIDEN
3955 ? vectype_out : cvt_type);
4a00c761
JJ
3956
3957 if (!slp_node)
3958 {
30862efc 3959 if (modifier == WIDEN)
4a00c761 3960 {
c3284718 3961 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3962 if (op_type == binary_op)
9771b263 3963 vec_oprnds1.create (1);
4a00c761 3964 }
30862efc 3965 else if (modifier == NARROW)
9771b263
DN
3966 vec_oprnds0.create (
3967 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3968 }
3969 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 3970 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 3971
4a00c761 3972 last_oprnd = op0;
ebfd146a
IR
3973 prev_stmt_info = NULL;
3974 switch (modifier)
3975 {
3976 case NONE:
3977 for (j = 0; j < ncopies; j++)
3978 {
ebfd146a 3979 if (j == 0)
d092494c
IR
3980 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3981 -1);
ebfd146a
IR
3982 else
3983 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3984
9771b263 3985 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
3986 {
3987 /* Arguments are ready, create the new vector stmt. */
3988 if (code1 == CALL_EXPR)
3989 {
3990 new_stmt = gimple_build_call (decl1, 1, vop0);
3991 new_temp = make_ssa_name (vec_dest, new_stmt);
3992 gimple_call_set_lhs (new_stmt, new_temp);
3993 }
3994 else
3995 {
3996 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 3997 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
3998 new_temp = make_ssa_name (vec_dest, new_stmt);
3999 gimple_assign_set_lhs (new_stmt, new_temp);
4000 }
4001
4002 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4003 if (slp_node)
9771b263 4004 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4005 else
4006 {
4007 if (!prev_stmt_info)
4008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4009 else
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4011 prev_stmt_info = vinfo_for_stmt (new_stmt);
4012 }
4a00c761 4013 }
ebfd146a
IR
4014 }
4015 break;
4016
4017 case WIDEN:
4018 /* In case the vectorization factor (VF) is bigger than the number
4019 of elements that we can fit in a vectype (nunits), we have to
4020 generate more than one vector stmt - i.e - we need to "unroll"
4021 the vector stmt by a factor VF/nunits. */
4022 for (j = 0; j < ncopies; j++)
4023 {
4a00c761 4024 /* Handle uses. */
ebfd146a 4025 if (j == 0)
4a00c761
JJ
4026 {
4027 if (slp_node)
4028 {
4029 if (code == WIDEN_LSHIFT_EXPR)
4030 {
4031 unsigned int k;
ebfd146a 4032
4a00c761
JJ
4033 vec_oprnd1 = op1;
4034 /* Store vec_oprnd1 for every vector stmt to be created
4035 for SLP_NODE. We check during the analysis that all
4036 the shift arguments are the same. */
4037 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4038 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4039
4040 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4041 slp_node, -1);
4042 }
4043 else
4044 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4045 &vec_oprnds1, slp_node, -1);
4046 }
4047 else
4048 {
81c40241 4049 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4050 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4051 if (op_type == binary_op)
4052 {
4053 if (code == WIDEN_LSHIFT_EXPR)
4054 vec_oprnd1 = op1;
4055 else
81c40241 4056 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4057 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4058 }
4059 }
4060 }
ebfd146a 4061 else
4a00c761
JJ
4062 {
4063 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4064 vec_oprnds0.truncate (0);
4065 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4066 if (op_type == binary_op)
4067 {
4068 if (code == WIDEN_LSHIFT_EXPR)
4069 vec_oprnd1 = op1;
4070 else
4071 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4072 vec_oprnd1);
9771b263
DN
4073 vec_oprnds1.truncate (0);
4074 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4075 }
4076 }
ebfd146a 4077
4a00c761
JJ
4078 /* Arguments are ready. Create the new vector stmts. */
4079 for (i = multi_step_cvt; i >= 0; i--)
4080 {
9771b263 4081 tree this_dest = vec_dsts[i];
4a00c761
JJ
4082 enum tree_code c1 = code1, c2 = code2;
4083 if (i == 0 && codecvt2 != ERROR_MARK)
4084 {
4085 c1 = codecvt1;
4086 c2 = codecvt2;
4087 }
4088 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4089 &vec_oprnds1,
4090 stmt, this_dest, gsi,
4091 c1, c2, decl1, decl2,
4092 op_type);
4093 }
4094
9771b263 4095 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4096 {
4097 if (cvt_type)
4098 {
4099 if (codecvt1 == CALL_EXPR)
4100 {
4101 new_stmt = gimple_build_call (decl1, 1, vop0);
4102 new_temp = make_ssa_name (vec_dest, new_stmt);
4103 gimple_call_set_lhs (new_stmt, new_temp);
4104 }
4105 else
4106 {
4107 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4108 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4109 new_stmt = gimple_build_assign (new_temp, codecvt1,
4110 vop0);
4a00c761
JJ
4111 }
4112
4113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4114 }
4115 else
4116 new_stmt = SSA_NAME_DEF_STMT (vop0);
4117
4118 if (slp_node)
9771b263 4119 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4120 else
c689ce1e
RB
4121 {
4122 if (!prev_stmt_info)
4123 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4124 else
4125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4126 prev_stmt_info = vinfo_for_stmt (new_stmt);
4127 }
4a00c761 4128 }
ebfd146a 4129 }
4a00c761
JJ
4130
4131 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4132 break;
4133
4134 case NARROW:
4135 /* In case the vectorization factor (VF) is bigger than the number
4136 of elements that we can fit in a vectype (nunits), we have to
4137 generate more than one vector stmt - i.e - we need to "unroll"
4138 the vector stmt by a factor VF/nunits. */
4139 for (j = 0; j < ncopies; j++)
4140 {
4141 /* Handle uses. */
4a00c761
JJ
4142 if (slp_node)
4143 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4144 slp_node, -1);
ebfd146a
IR
4145 else
4146 {
9771b263 4147 vec_oprnds0.truncate (0);
4a00c761
JJ
4148 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4149 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4150 }
4151
4a00c761
JJ
4152 /* Arguments are ready. Create the new vector stmts. */
4153 if (cvt_type)
9771b263 4154 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4155 {
4156 if (codecvt1 == CALL_EXPR)
4157 {
4158 new_stmt = gimple_build_call (decl1, 1, vop0);
4159 new_temp = make_ssa_name (vec_dest, new_stmt);
4160 gimple_call_set_lhs (new_stmt, new_temp);
4161 }
4162 else
4163 {
4164 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4165 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4166 new_stmt = gimple_build_assign (new_temp, codecvt1,
4167 vop0);
4a00c761 4168 }
ebfd146a 4169
4a00c761 4170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4171 vec_oprnds0[i] = new_temp;
4a00c761 4172 }
ebfd146a 4173
4a00c761
JJ
4174 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4175 stmt, vec_dsts, gsi,
4176 slp_node, code1,
4177 &prev_stmt_info);
ebfd146a
IR
4178 }
4179
4180 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4181 break;
ebfd146a
IR
4182 }
4183
9771b263
DN
4184 vec_oprnds0.release ();
4185 vec_oprnds1.release ();
4186 vec_dsts.release ();
4187 interm_types.release ();
ebfd146a
IR
4188
4189 return true;
4190}
ff802fa1
IR
4191
4192
ebfd146a
IR
4193/* Function vectorizable_assignment.
4194
b8698a0f
L
4195 Check if STMT performs an assignment (copy) that can be vectorized.
4196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4199
4200static bool
355fe088
TS
4201vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4202 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4203{
4204 tree vec_dest;
4205 tree scalar_dest;
4206 tree op;
4207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4209 tree new_temp;
355fe088 4210 gimple *def_stmt;
ebfd146a 4211 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
ebfd146a 4212 int ncopies;
f18b55bd 4213 int i, j;
6e1aa848 4214 vec<tree> vec_oprnds = vNULL;
ebfd146a 4215 tree vop;
a70d6342 4216 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4217 vec_info *vinfo = stmt_info->vinfo;
355fe088 4218 gimple *new_stmt = NULL;
f18b55bd 4219 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4220 enum tree_code code;
4221 tree vectype_in;
ebfd146a 4222
a70d6342 4223 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4224 return false;
4225
66c16fd9
RB
4226 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4227 && ! vec_stmt)
ebfd146a
IR
4228 return false;
4229
4230 /* Is vectorizable assignment? */
4231 if (!is_gimple_assign (stmt))
4232 return false;
4233
4234 scalar_dest = gimple_assign_lhs (stmt);
4235 if (TREE_CODE (scalar_dest) != SSA_NAME)
4236 return false;
4237
fde9c428 4238 code = gimple_assign_rhs_code (stmt);
ebfd146a 4239 if (gimple_assign_single_p (stmt)
fde9c428
RG
4240 || code == PAREN_EXPR
4241 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4242 op = gimple_assign_rhs1 (stmt);
4243 else
4244 return false;
4245
7b7ec6c5
RG
4246 if (code == VIEW_CONVERT_EXPR)
4247 op = TREE_OPERAND (op, 0);
4248
465c8c19
JJ
4249 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4250 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4251
4252 /* Multiple types in SLP are handled by creating the appropriate number of
4253 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4254 case of SLP. */
fce57248 4255 if (slp_node)
465c8c19
JJ
4256 ncopies = 1;
4257 else
4258 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4259
4260 gcc_assert (ncopies >= 1);
4261
81c40241 4262 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4263 {
73fbfcad 4264 if (dump_enabled_p ())
78c60e3d 4265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4266 "use not simple.\n");
ebfd146a
IR
4267 return false;
4268 }
4269
fde9c428
RG
4270 /* We can handle NOP_EXPR conversions that do not change the number
4271 of elements or the vector size. */
7b7ec6c5
RG
4272 if ((CONVERT_EXPR_CODE_P (code)
4273 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4274 && (!vectype_in
4275 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4276 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4277 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4278 return false;
4279
7b7b1813
RG
4280 /* We do not handle bit-precision changes. */
4281 if ((CONVERT_EXPR_CODE_P (code)
4282 || code == VIEW_CONVERT_EXPR)
4283 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4284 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4285 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4286 || ((TYPE_PRECISION (TREE_TYPE (op))
4287 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4288 /* But a conversion that does not change the bit-pattern is ok. */
4289 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4290 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4291 && TYPE_UNSIGNED (TREE_TYPE (op)))
4292 /* Conversion between boolean types of different sizes is
4293 a simple assignment in case their vectypes are same
4294 boolean vectors. */
4295 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4296 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4297 {
73fbfcad 4298 if (dump_enabled_p ())
78c60e3d
SS
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4300 "type conversion to/from bit-precision "
e645e942 4301 "unsupported.\n");
7b7b1813
RG
4302 return false;
4303 }
4304
ebfd146a
IR
4305 if (!vec_stmt) /* transformation not required. */
4306 {
4307 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4308 if (dump_enabled_p ())
78c60e3d 4309 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4310 "=== vectorizable_assignment ===\n");
c3e7ee41 4311 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4312 return true;
4313 }
4314
4315 /** Transform. **/
73fbfcad 4316 if (dump_enabled_p ())
e645e942 4317 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4318
4319 /* Handle def. */
4320 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4321
4322 /* Handle use. */
f18b55bd 4323 for (j = 0; j < ncopies; j++)
ebfd146a 4324 {
f18b55bd
IR
4325 /* Handle uses. */
4326 if (j == 0)
d092494c 4327 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4328 else
4329 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4330
4331 /* Arguments are ready. create the new vector stmt. */
9771b263 4332 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4333 {
7b7ec6c5
RG
4334 if (CONVERT_EXPR_CODE_P (code)
4335 || code == VIEW_CONVERT_EXPR)
4a73490d 4336 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4337 new_stmt = gimple_build_assign (vec_dest, vop);
4338 new_temp = make_ssa_name (vec_dest, new_stmt);
4339 gimple_assign_set_lhs (new_stmt, new_temp);
4340 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4341 if (slp_node)
9771b263 4342 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4343 }
ebfd146a
IR
4344
4345 if (slp_node)
f18b55bd
IR
4346 continue;
4347
4348 if (j == 0)
4349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4350 else
4351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4352
4353 prev_stmt_info = vinfo_for_stmt (new_stmt);
4354 }
b8698a0f 4355
9771b263 4356 vec_oprnds.release ();
ebfd146a
IR
4357 return true;
4358}
4359
9dc3f7de 4360
1107f3ae
IR
4361/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4362 either as shift by a scalar or by a vector. */
4363
4364bool
4365vect_supportable_shift (enum tree_code code, tree scalar_type)
4366{
4367
ef4bddc2 4368 machine_mode vec_mode;
1107f3ae
IR
4369 optab optab;
4370 int icode;
4371 tree vectype;
4372
4373 vectype = get_vectype_for_scalar_type (scalar_type);
4374 if (!vectype)
4375 return false;
4376
4377 optab = optab_for_tree_code (code, vectype, optab_scalar);
4378 if (!optab
4379 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4380 {
4381 optab = optab_for_tree_code (code, vectype, optab_vector);
4382 if (!optab
4383 || (optab_handler (optab, TYPE_MODE (vectype))
4384 == CODE_FOR_nothing))
4385 return false;
4386 }
4387
4388 vec_mode = TYPE_MODE (vectype);
4389 icode = (int) optab_handler (optab, vec_mode);
4390 if (icode == CODE_FOR_nothing)
4391 return false;
4392
4393 return true;
4394}
4395
4396
9dc3f7de
IR
4397/* Function vectorizable_shift.
4398
4399 Check if STMT performs a shift operation that can be vectorized.
4400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4403
4404static bool
355fe088
TS
4405vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4406 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4407{
4408 tree vec_dest;
4409 tree scalar_dest;
4410 tree op0, op1 = NULL;
4411 tree vec_oprnd1 = NULL_TREE;
4412 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4413 tree vectype;
4414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4415 enum tree_code code;
ef4bddc2 4416 machine_mode vec_mode;
9dc3f7de
IR
4417 tree new_temp;
4418 optab optab;
4419 int icode;
ef4bddc2 4420 machine_mode optab_op2_mode;
355fe088 4421 gimple *def_stmt;
9dc3f7de 4422 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
355fe088 4423 gimple *new_stmt = NULL;
9dc3f7de
IR
4424 stmt_vec_info prev_stmt_info;
4425 int nunits_in;
4426 int nunits_out;
4427 tree vectype_out;
cede2577 4428 tree op1_vectype;
9dc3f7de
IR
4429 int ncopies;
4430 int j, i;
6e1aa848
DN
4431 vec<tree> vec_oprnds0 = vNULL;
4432 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4433 tree vop0, vop1;
4434 unsigned int k;
49eab32e 4435 bool scalar_shift_arg = true;
9dc3f7de 4436 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4437 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4438 int vf;
4439
4440 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4441 return false;
4442
66c16fd9
RB
4443 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4444 && ! vec_stmt)
9dc3f7de
IR
4445 return false;
4446
4447 /* Is STMT a vectorizable binary/unary operation? */
4448 if (!is_gimple_assign (stmt))
4449 return false;
4450
4451 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4452 return false;
4453
4454 code = gimple_assign_rhs_code (stmt);
4455
4456 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4457 || code == RROTATE_EXPR))
4458 return false;
4459
4460 scalar_dest = gimple_assign_lhs (stmt);
4461 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4462 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4463 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4464 {
73fbfcad 4465 if (dump_enabled_p ())
78c60e3d 4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4467 "bit-precision shifts not supported.\n");
7b7b1813
RG
4468 return false;
4469 }
9dc3f7de
IR
4470
4471 op0 = gimple_assign_rhs1 (stmt);
81c40241 4472 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 4473 {
73fbfcad 4474 if (dump_enabled_p ())
78c60e3d 4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4476 "use not simple.\n");
9dc3f7de
IR
4477 return false;
4478 }
4479 /* If op0 is an external or constant def use a vector type with
4480 the same size as the output vector type. */
4481 if (!vectype)
4482 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4483 if (vec_stmt)
4484 gcc_assert (vectype);
4485 if (!vectype)
4486 {
73fbfcad 4487 if (dump_enabled_p ())
78c60e3d 4488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4489 "no vectype for scalar type\n");
9dc3f7de
IR
4490 return false;
4491 }
4492
4493 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4494 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4495 if (nunits_out != nunits_in)
4496 return false;
4497
4498 op1 = gimple_assign_rhs2 (stmt);
81c40241 4499 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 4500 {
73fbfcad 4501 if (dump_enabled_p ())
78c60e3d 4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4503 "use not simple.\n");
9dc3f7de
IR
4504 return false;
4505 }
4506
4507 if (loop_vinfo)
4508 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4509 else
4510 vf = 1;
4511
4512 /* Multiple types in SLP are handled by creating the appropriate number of
4513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4514 case of SLP. */
fce57248 4515 if (slp_node)
9dc3f7de
IR
4516 ncopies = 1;
4517 else
4518 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4519
4520 gcc_assert (ncopies >= 1);
4521
4522 /* Determine whether the shift amount is a vector, or scalar. If the
4523 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4524
dbfa87aa
YR
4525 if ((dt[1] == vect_internal_def
4526 || dt[1] == vect_induction_def)
4527 && !slp_node)
49eab32e
JJ
4528 scalar_shift_arg = false;
4529 else if (dt[1] == vect_constant_def
4530 || dt[1] == vect_external_def
4531 || dt[1] == vect_internal_def)
4532 {
4533 /* In SLP, need to check whether the shift count is the same,
4534 in loops if it is a constant or invariant, it is always
4535 a scalar shift. */
4536 if (slp_node)
4537 {
355fe088
TS
4538 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4539 gimple *slpstmt;
49eab32e 4540
9771b263 4541 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4542 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4543 scalar_shift_arg = false;
4544 }
60d393e8
RB
4545
4546 /* If the shift amount is computed by a pattern stmt we cannot
4547 use the scalar amount directly thus give up and use a vector
4548 shift. */
4549 if (dt[1] == vect_internal_def)
4550 {
4551 gimple *def = SSA_NAME_DEF_STMT (op1);
4552 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4553 scalar_shift_arg = false;
4554 }
49eab32e
JJ
4555 }
4556 else
4557 {
73fbfcad 4558 if (dump_enabled_p ())
78c60e3d 4559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4560 "operand mode requires invariant argument.\n");
49eab32e
JJ
4561 return false;
4562 }
4563
9dc3f7de 4564 /* Vector shifted by vector. */
49eab32e 4565 if (!scalar_shift_arg)
9dc3f7de
IR
4566 {
4567 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4568 if (dump_enabled_p ())
78c60e3d 4569 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4570 "vector/vector shift/rotate found.\n");
78c60e3d 4571
aa948027
JJ
4572 if (!op1_vectype)
4573 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4574 if (op1_vectype == NULL_TREE
4575 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4576 {
73fbfcad 4577 if (dump_enabled_p ())
78c60e3d
SS
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4579 "unusable type for last operand in"
e645e942 4580 " vector/vector shift/rotate.\n");
cede2577
JJ
4581 return false;
4582 }
9dc3f7de
IR
4583 }
4584 /* See if the machine has a vector shifted by scalar insn and if not
4585 then see if it has a vector shifted by vector insn. */
49eab32e 4586 else
9dc3f7de
IR
4587 {
4588 optab = optab_for_tree_code (code, vectype, optab_scalar);
4589 if (optab
4590 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4591 {
73fbfcad 4592 if (dump_enabled_p ())
78c60e3d 4593 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4594 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4595 }
4596 else
4597 {
4598 optab = optab_for_tree_code (code, vectype, optab_vector);
4599 if (optab
4600 && (optab_handler (optab, TYPE_MODE (vectype))
4601 != CODE_FOR_nothing))
4602 {
49eab32e
JJ
4603 scalar_shift_arg = false;
4604
73fbfcad 4605 if (dump_enabled_p ())
78c60e3d 4606 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4607 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4608
4609 /* Unlike the other binary operators, shifts/rotates have
4610 the rhs being int, instead of the same type as the lhs,
4611 so make sure the scalar is the right type if we are
aa948027 4612 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4613 if (dt[1] == vect_constant_def)
4614 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4615 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4616 TREE_TYPE (op1)))
4617 {
4618 if (slp_node
4619 && TYPE_MODE (TREE_TYPE (vectype))
4620 != TYPE_MODE (TREE_TYPE (op1)))
4621 {
73fbfcad 4622 if (dump_enabled_p ())
78c60e3d
SS
4623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4624 "unusable type for last operand in"
e645e942 4625 " vector/vector shift/rotate.\n");
21c0a521 4626 return false;
aa948027
JJ
4627 }
4628 if (vec_stmt && !slp_node)
4629 {
4630 op1 = fold_convert (TREE_TYPE (vectype), op1);
4631 op1 = vect_init_vector (stmt, op1,
4632 TREE_TYPE (vectype), NULL);
4633 }
4634 }
9dc3f7de
IR
4635 }
4636 }
4637 }
9dc3f7de
IR
4638
4639 /* Supportable by target? */
4640 if (!optab)
4641 {
73fbfcad 4642 if (dump_enabled_p ())
78c60e3d 4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4644 "no optab.\n");
9dc3f7de
IR
4645 return false;
4646 }
4647 vec_mode = TYPE_MODE (vectype);
4648 icode = (int) optab_handler (optab, vec_mode);
4649 if (icode == CODE_FOR_nothing)
4650 {
73fbfcad 4651 if (dump_enabled_p ())
78c60e3d 4652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4653 "op not supported by target.\n");
9dc3f7de
IR
4654 /* Check only during analysis. */
4655 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4656 || (vf < vect_min_worthwhile_factor (code)
4657 && !vec_stmt))
4658 return false;
73fbfcad 4659 if (dump_enabled_p ())
e645e942
TJ
4660 dump_printf_loc (MSG_NOTE, vect_location,
4661 "proceeding using word mode.\n");
9dc3f7de
IR
4662 }
4663
4664 /* Worthwhile without SIMD support? Check only during analysis. */
4665 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4666 && vf < vect_min_worthwhile_factor (code)
4667 && !vec_stmt)
4668 {
73fbfcad 4669 if (dump_enabled_p ())
78c60e3d 4670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4671 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4672 return false;
4673 }
4674
4675 if (!vec_stmt) /* transformation not required. */
4676 {
4677 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4678 if (dump_enabled_p ())
e645e942
TJ
4679 dump_printf_loc (MSG_NOTE, vect_location,
4680 "=== vectorizable_shift ===\n");
c3e7ee41 4681 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4682 return true;
4683 }
4684
4685 /** Transform. **/
4686
73fbfcad 4687 if (dump_enabled_p ())
78c60e3d 4688 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4689 "transform binary/unary operation.\n");
9dc3f7de
IR
4690
4691 /* Handle def. */
4692 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4693
9dc3f7de
IR
4694 prev_stmt_info = NULL;
4695 for (j = 0; j < ncopies; j++)
4696 {
4697 /* Handle uses. */
4698 if (j == 0)
4699 {
4700 if (scalar_shift_arg)
4701 {
4702 /* Vector shl and shr insn patterns can be defined with scalar
4703 operand 2 (shift operand). In this case, use constant or loop
4704 invariant op1 directly, without extending it to vector mode
4705 first. */
4706 optab_op2_mode = insn_data[icode].operand[2].mode;
4707 if (!VECTOR_MODE_P (optab_op2_mode))
4708 {
73fbfcad 4709 if (dump_enabled_p ())
78c60e3d 4710 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4711 "operand 1 using scalar mode.\n");
9dc3f7de 4712 vec_oprnd1 = op1;
8930f723 4713 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4714 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4715 if (slp_node)
4716 {
4717 /* Store vec_oprnd1 for every vector stmt to be created
4718 for SLP_NODE. We check during the analysis that all
4719 the shift arguments are the same.
4720 TODO: Allow different constants for different vector
4721 stmts generated for an SLP instance. */
4722 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4723 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4724 }
4725 }
4726 }
4727
4728 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4729 (a special case for certain kind of vector shifts); otherwise,
4730 operand 1 should be of a vector type (the usual case). */
4731 if (vec_oprnd1)
4732 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4733 slp_node, -1);
9dc3f7de
IR
4734 else
4735 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4736 slp_node, -1);
9dc3f7de
IR
4737 }
4738 else
4739 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4740
4741 /* Arguments are ready. Create the new vector stmt. */
9771b263 4742 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4743 {
9771b263 4744 vop1 = vec_oprnds1[i];
0d0e4a03 4745 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
4746 new_temp = make_ssa_name (vec_dest, new_stmt);
4747 gimple_assign_set_lhs (new_stmt, new_temp);
4748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4749 if (slp_node)
9771b263 4750 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4751 }
4752
4753 if (slp_node)
4754 continue;
4755
4756 if (j == 0)
4757 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4758 else
4759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4760 prev_stmt_info = vinfo_for_stmt (new_stmt);
4761 }
4762
9771b263
DN
4763 vec_oprnds0.release ();
4764 vec_oprnds1.release ();
9dc3f7de
IR
4765
4766 return true;
4767}
4768
4769
ebfd146a
IR
4770/* Function vectorizable_operation.
4771
16949072
RG
4772 Check if STMT performs a binary, unary or ternary operation that can
4773 be vectorized.
b8698a0f 4774 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4775 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4776 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4777
4778static bool
355fe088
TS
4779vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4780 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 4781{
00f07b86 4782 tree vec_dest;
ebfd146a 4783 tree scalar_dest;
16949072 4784 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4785 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4786 tree vectype;
ebfd146a
IR
4787 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4788 enum tree_code code;
ef4bddc2 4789 machine_mode vec_mode;
ebfd146a
IR
4790 tree new_temp;
4791 int op_type;
00f07b86 4792 optab optab;
523ba738 4793 bool target_support_p;
355fe088 4794 gimple *def_stmt;
16949072
RG
4795 enum vect_def_type dt[3]
4796 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
355fe088 4797 gimple *new_stmt = NULL;
ebfd146a 4798 stmt_vec_info prev_stmt_info;
b690cc0f 4799 int nunits_in;
ebfd146a
IR
4800 int nunits_out;
4801 tree vectype_out;
4802 int ncopies;
4803 int j, i;
6e1aa848
DN
4804 vec<tree> vec_oprnds0 = vNULL;
4805 vec<tree> vec_oprnds1 = vNULL;
4806 vec<tree> vec_oprnds2 = vNULL;
16949072 4807 tree vop0, vop1, vop2;
a70d6342 4808 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4809 vec_info *vinfo = stmt_info->vinfo;
a70d6342
IR
4810 int vf;
4811
a70d6342 4812 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4813 return false;
4814
66c16fd9
RB
4815 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4816 && ! vec_stmt)
ebfd146a
IR
4817 return false;
4818
4819 /* Is STMT a vectorizable binary/unary operation? */
4820 if (!is_gimple_assign (stmt))
4821 return false;
4822
4823 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4824 return false;
4825
ebfd146a
IR
4826 code = gimple_assign_rhs_code (stmt);
4827
4828 /* For pointer addition, we should use the normal plus for
4829 the vector addition. */
4830 if (code == POINTER_PLUS_EXPR)
4831 code = PLUS_EXPR;
4832
4833 /* Support only unary or binary operations. */
4834 op_type = TREE_CODE_LENGTH (code);
16949072 4835 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4836 {
73fbfcad 4837 if (dump_enabled_p ())
78c60e3d 4838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4839 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4840 op_type);
ebfd146a
IR
4841 return false;
4842 }
4843
b690cc0f
RG
4844 scalar_dest = gimple_assign_lhs (stmt);
4845 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4846
7b7b1813
RG
4847 /* Most operations cannot handle bit-precision types without extra
4848 truncations. */
045c1278
IE
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4850 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4851 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
7b7b1813
RG
4852 /* Exception are bitwise binary operations. */
4853 && code != BIT_IOR_EXPR
4854 && code != BIT_XOR_EXPR
4855 && code != BIT_AND_EXPR)
4856 {
73fbfcad 4857 if (dump_enabled_p ())
78c60e3d 4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4859 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4860 return false;
4861 }
4862
ebfd146a 4863 op0 = gimple_assign_rhs1 (stmt);
81c40241 4864 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 4865 {
73fbfcad 4866 if (dump_enabled_p ())
78c60e3d 4867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4868 "use not simple.\n");
ebfd146a
IR
4869 return false;
4870 }
b690cc0f
RG
4871 /* If op0 is an external or constant def use a vector type with
4872 the same size as the output vector type. */
4873 if (!vectype)
b036c6c5
IE
4874 {
4875 /* For boolean type we cannot determine vectype by
4876 invariant value (don't know whether it is a vector
4877 of booleans or vector of integers). We use output
4878 vectype because operations on boolean don't change
4879 type. */
4880 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4881 {
4882 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4883 {
4884 if (dump_enabled_p ())
4885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4886 "not supported operation on bool value.\n");
4887 return false;
4888 }
4889 vectype = vectype_out;
4890 }
4891 else
4892 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4893 }
7d8930a0
IR
4894 if (vec_stmt)
4895 gcc_assert (vectype);
4896 if (!vectype)
4897 {
73fbfcad 4898 if (dump_enabled_p ())
7d8930a0 4899 {
78c60e3d
SS
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4901 "no vectype for scalar type ");
4902 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4903 TREE_TYPE (op0));
e645e942 4904 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4905 }
4906
4907 return false;
4908 }
b690cc0f
RG
4909
4910 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4911 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4912 if (nunits_out != nunits_in)
4913 return false;
ebfd146a 4914
16949072 4915 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4916 {
4917 op1 = gimple_assign_rhs2 (stmt);
81c40241 4918 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 4919 {
73fbfcad 4920 if (dump_enabled_p ())
78c60e3d 4921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4922 "use not simple.\n");
ebfd146a
IR
4923 return false;
4924 }
4925 }
16949072
RG
4926 if (op_type == ternary_op)
4927 {
4928 op2 = gimple_assign_rhs3 (stmt);
81c40241 4929 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 4930 {
73fbfcad 4931 if (dump_enabled_p ())
78c60e3d 4932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4933 "use not simple.\n");
16949072
RG
4934 return false;
4935 }
4936 }
ebfd146a 4937
b690cc0f
RG
4938 if (loop_vinfo)
4939 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4940 else
4941 vf = 1;
4942
4943 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4945 case of SLP. */
fce57248 4946 if (slp_node)
b690cc0f
RG
4947 ncopies = 1;
4948 else
4949 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4950
4951 gcc_assert (ncopies >= 1);
4952
9dc3f7de 4953 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4954 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4955 || code == RROTATE_EXPR)
9dc3f7de 4956 return false;
ebfd146a 4957
ebfd146a 4958 /* Supportable by target? */
00f07b86
RH
4959
4960 vec_mode = TYPE_MODE (vectype);
4961 if (code == MULT_HIGHPART_EXPR)
523ba738 4962 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
4963 else
4964 {
4965 optab = optab_for_tree_code (code, vectype, optab_default);
4966 if (!optab)
5deb57cb 4967 {
73fbfcad 4968 if (dump_enabled_p ())
78c60e3d 4969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4970 "no optab.\n");
00f07b86 4971 return false;
5deb57cb 4972 }
523ba738
RS
4973 target_support_p = (optab_handler (optab, vec_mode)
4974 != CODE_FOR_nothing);
5deb57cb
JJ
4975 }
4976
523ba738 4977 if (!target_support_p)
ebfd146a 4978 {
73fbfcad 4979 if (dump_enabled_p ())
78c60e3d 4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4981 "op not supported by target.\n");
ebfd146a
IR
4982 /* Check only during analysis. */
4983 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 4984 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 4985 return false;
73fbfcad 4986 if (dump_enabled_p ())
e645e942
TJ
4987 dump_printf_loc (MSG_NOTE, vect_location,
4988 "proceeding using word mode.\n");
383d9c83
IR
4989 }
4990
4a00c761 4991 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
4992 if (!VECTOR_MODE_P (vec_mode)
4993 && !vec_stmt
4994 && vf < vect_min_worthwhile_factor (code))
7d8930a0 4995 {
73fbfcad 4996 if (dump_enabled_p ())
78c60e3d 4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4998 "not worthwhile without SIMD support.\n");
e34842c6 4999 return false;
7d8930a0 5000 }
ebfd146a 5001
ebfd146a
IR
5002 if (!vec_stmt) /* transformation not required. */
5003 {
4a00c761 5004 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5005 if (dump_enabled_p ())
78c60e3d 5006 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5007 "=== vectorizable_operation ===\n");
c3e7ee41 5008 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
5009 return true;
5010 }
5011
5012 /** Transform. **/
5013
73fbfcad 5014 if (dump_enabled_p ())
78c60e3d 5015 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5016 "transform binary/unary operation.\n");
383d9c83 5017
ebfd146a 5018 /* Handle def. */
00f07b86 5019 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5020
ebfd146a
IR
5021 /* In case the vectorization factor (VF) is bigger than the number
5022 of elements that we can fit in a vectype (nunits), we have to generate
5023 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5024 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5025 from one copy of the vector stmt to the next, in the field
5026 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5027 stages to find the correct vector defs to be used when vectorizing
5028 stmts that use the defs of the current stmt. The example below
5029 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5030 we need to create 4 vectorized stmts):
5031
5032 before vectorization:
5033 RELATED_STMT VEC_STMT
5034 S1: x = memref - -
5035 S2: z = x + 1 - -
5036
5037 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5038 there):
5039 RELATED_STMT VEC_STMT
5040 VS1_0: vx0 = memref0 VS1_1 -
5041 VS1_1: vx1 = memref1 VS1_2 -
5042 VS1_2: vx2 = memref2 VS1_3 -
5043 VS1_3: vx3 = memref3 - -
5044 S1: x = load - VS1_0
5045 S2: z = x + 1 - -
5046
5047 step2: vectorize stmt S2 (done here):
5048 To vectorize stmt S2 we first need to find the relevant vector
5049 def for the first operand 'x'. This is, as usual, obtained from
5050 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5051 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5052 relevant vector def 'vx0'. Having found 'vx0' we can generate
5053 the vector stmt VS2_0, and as usual, record it in the
5054 STMT_VINFO_VEC_STMT of stmt S2.
5055 When creating the second copy (VS2_1), we obtain the relevant vector
5056 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5057 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5058 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5059 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5060 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5061 chain of stmts and pointers:
5062 RELATED_STMT VEC_STMT
5063 VS1_0: vx0 = memref0 VS1_1 -
5064 VS1_1: vx1 = memref1 VS1_2 -
5065 VS1_2: vx2 = memref2 VS1_3 -
5066 VS1_3: vx3 = memref3 - -
5067 S1: x = load - VS1_0
5068 VS2_0: vz0 = vx0 + v1 VS2_1 -
5069 VS2_1: vz1 = vx1 + v1 VS2_2 -
5070 VS2_2: vz2 = vx2 + v1 VS2_3 -
5071 VS2_3: vz3 = vx3 + v1 - -
5072 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5073
5074 prev_stmt_info = NULL;
5075 for (j = 0; j < ncopies; j++)
5076 {
5077 /* Handle uses. */
5078 if (j == 0)
4a00c761
JJ
5079 {
5080 if (op_type == binary_op || op_type == ternary_op)
5081 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5082 slp_node, -1);
5083 else
5084 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5085 slp_node, -1);
5086 if (op_type == ternary_op)
36ba4aae 5087 {
9771b263
DN
5088 vec_oprnds2.create (1);
5089 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
81c40241 5090 stmt));
36ba4aae 5091 }
4a00c761 5092 }
ebfd146a 5093 else
4a00c761
JJ
5094 {
5095 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5096 if (op_type == ternary_op)
5097 {
9771b263
DN
5098 tree vec_oprnd = vec_oprnds2.pop ();
5099 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5100 vec_oprnd));
4a00c761
JJ
5101 }
5102 }
5103
5104 /* Arguments are ready. Create the new vector stmt. */
9771b263 5105 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5106 {
4a00c761 5107 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5108 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5109 vop2 = ((op_type == ternary_op)
9771b263 5110 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5111 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5112 new_temp = make_ssa_name (vec_dest, new_stmt);
5113 gimple_assign_set_lhs (new_stmt, new_temp);
5114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5115 if (slp_node)
9771b263 5116 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5117 }
5118
4a00c761
JJ
5119 if (slp_node)
5120 continue;
5121
5122 if (j == 0)
5123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5124 else
5125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5126 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5127 }
5128
9771b263
DN
5129 vec_oprnds0.release ();
5130 vec_oprnds1.release ();
5131 vec_oprnds2.release ();
ebfd146a 5132
ebfd146a
IR
5133 return true;
5134}
5135
c716e67f
XDL
5136/* A helper function to ensure data reference DR's base alignment
5137 for STMT_INFO. */
5138
5139static void
5140ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5141{
5142 if (!dr->aux)
5143 return;
5144
52639a61 5145 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f
XDL
5146 {
5147 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
52639a61 5148 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5149
428f0c67
JH
5150 if (decl_in_symtab_p (base_decl))
5151 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5152 else
5153 {
fe37c7af 5154 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
428f0c67
JH
5155 DECL_USER_ALIGN (base_decl) = 1;
5156 }
52639a61 5157 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5158 }
5159}
5160
ebfd146a 5161
09dfa495
BM
5162/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5163 reversal of the vector elements. If that is impossible to do,
5164 returns NULL. */
5165
5166static tree
5167perm_mask_for_reverse (tree vectype)
5168{
5169 int i, nunits;
5170 unsigned char *sel;
5171
5172 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5173 sel = XALLOCAVEC (unsigned char, nunits);
5174
5175 for (i = 0; i < nunits; ++i)
5176 sel[i] = nunits - 1 - i;
5177
557be5a8
AL
5178 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5179 return NULL_TREE;
5180 return vect_gen_perm_mask_checked (vectype, sel);
09dfa495
BM
5181}
5182
ebfd146a
IR
5183/* Function vectorizable_store.
5184
b8698a0f
L
5185 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5186 can be vectorized.
5187 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5188 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5190
5191static bool
355fe088 5192vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5193 slp_tree slp_node)
ebfd146a
IR
5194{
5195 tree scalar_dest;
5196 tree data_ref;
5197 tree op;
5198 tree vec_oprnd = NULL_TREE;
5199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5200 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5201 tree elem_type;
ebfd146a 5202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5203 struct loop *loop = NULL;
ef4bddc2 5204 machine_mode vec_mode;
ebfd146a
IR
5205 tree dummy;
5206 enum dr_alignment_support alignment_support_scheme;
355fe088 5207 gimple *def_stmt;
ebfd146a
IR
5208 enum vect_def_type dt;
5209 stmt_vec_info prev_stmt_info = NULL;
5210 tree dataref_ptr = NULL_TREE;
74bf76ed 5211 tree dataref_offset = NULL_TREE;
355fe088 5212 gimple *ptr_incr = NULL;
ebfd146a
IR
5213 int ncopies;
5214 int j;
355fe088 5215 gimple *next_stmt, *first_stmt = NULL;
0d0293ac 5216 bool grouped_store = false;
272c6793 5217 bool store_lanes_p = false;
ebfd146a 5218 unsigned int group_size, i;
6e1aa848
DN
5219 vec<tree> dr_chain = vNULL;
5220 vec<tree> oprnds = vNULL;
5221 vec<tree> result_chain = vNULL;
ebfd146a 5222 bool inv_p;
09dfa495
BM
5223 bool negative = false;
5224 tree offset = NULL_TREE;
6e1aa848 5225 vec<tree> vec_oprnds = vNULL;
ebfd146a 5226 bool slp = (slp_node != NULL);
ebfd146a 5227 unsigned int vec_num;
a70d6342 5228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5229 vec_info *vinfo = stmt_info->vinfo;
272c6793 5230 tree aggr_type;
3bab6342
AT
5231 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5232 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5233 int scatter_scale = 1;
5234 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5235 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5236 gimple *new_stmt;
a70d6342 5237
a70d6342 5238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5239 return false;
5240
66c16fd9
RB
5241 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5242 && ! vec_stmt)
ebfd146a
IR
5243 return false;
5244
5245 /* Is vectorizable store? */
5246
5247 if (!is_gimple_assign (stmt))
5248 return false;
5249
5250 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5251 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5252 && is_pattern_stmt_p (stmt_info))
5253 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5254 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5255 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5256 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5257 && TREE_CODE (scalar_dest) != COMPONENT_REF
5258 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5259 && TREE_CODE (scalar_dest) != REALPART_EXPR
5260 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5261 return false;
5262
fce57248
RS
5263 /* Cannot have hybrid store SLP -- that would mean storing to the
5264 same location twice. */
5265 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5266
ebfd146a 5267 gcc_assert (gimple_assign_single_p (stmt));
465c8c19 5268
f4d09712 5269 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
465c8c19
JJ
5270 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5271
5272 if (loop_vinfo)
5273 loop = LOOP_VINFO_LOOP (loop_vinfo);
5274
5275 /* Multiple types in SLP are handled by creating the appropriate number of
5276 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5277 case of SLP. */
fce57248 5278 if (slp)
465c8c19
JJ
5279 ncopies = 1;
5280 else
5281 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5282
5283 gcc_assert (ncopies >= 1);
5284
5285 /* FORNOW. This restriction should be relaxed. */
5286 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5287 {
5288 if (dump_enabled_p ())
5289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5290 "multiple types in nested loop.\n");
5291 return false;
5292 }
5293
ebfd146a 5294 op = gimple_assign_rhs1 (stmt);
f4d09712
KY
5295
5296 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
ebfd146a 5297 {
73fbfcad 5298 if (dump_enabled_p ())
78c60e3d 5299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5300 "use not simple.\n");
ebfd146a
IR
5301 return false;
5302 }
5303
f4d09712
KY
5304 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5305 return false;
5306
272c6793 5307 elem_type = TREE_TYPE (vectype);
ebfd146a 5308 vec_mode = TYPE_MODE (vectype);
7b7b1813 5309
ebfd146a
IR
5310 /* FORNOW. In some cases can vectorize even if data-type not supported
5311 (e.g. - array initialization with 0). */
947131ba 5312 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5313 return false;
5314
5315 if (!STMT_VINFO_DATA_REF (stmt_info))
5316 return false;
5317
f2e2a985 5318 if (!STMT_VINFO_STRIDED_P (stmt_info))
09dfa495 5319 {
f2e2a985
MM
5320 negative =
5321 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5322 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5323 size_zero_node) < 0;
5324 if (negative && ncopies > 1)
09dfa495
BM
5325 {
5326 if (dump_enabled_p ())
5327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f2e2a985 5328 "multiple types with negative step.\n");
09dfa495
BM
5329 return false;
5330 }
f2e2a985 5331 if (negative)
09dfa495 5332 {
f2e2a985
MM
5333 gcc_assert (!grouped_store);
5334 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5335 if (alignment_support_scheme != dr_aligned
5336 && alignment_support_scheme != dr_unaligned_supported)
5337 {
5338 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5340 "negative step but alignment required.\n");
5341 return false;
5342 }
5343 if (dt != vect_constant_def
5344 && dt != vect_external_def
5345 && !perm_mask_for_reverse (vectype))
5346 {
5347 if (dump_enabled_p ())
5348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5349 "negative step and reversing not supported.\n");
5350 return false;
5351 }
09dfa495
BM
5352 }
5353 }
5354
0d0293ac 5355 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5356 {
0d0293ac 5357 grouped_store = true;
e14c1050 5358 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
cee62fee 5359 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
fce57248 5360 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
b602d918 5361 {
272c6793
RS
5362 if (vect_store_lanes_supported (vectype, group_size))
5363 store_lanes_p = true;
0d0293ac 5364 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5365 return false;
5366 }
b8698a0f 5367
cee62fee 5368 if (STMT_VINFO_STRIDED_P (stmt_info)
fce57248 5369 && slp
cee62fee
MM
5370 && (group_size > nunits
5371 || nunits % group_size != 0))
5372 {
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5374 "unhandled strided group store\n");
5375 return false;
5376 }
5377
ebfd146a
IR
5378 if (first_stmt == stmt)
5379 {
5380 /* STMT is the leader of the group. Check the operands of all the
5381 stmts of the group. */
e14c1050 5382 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5383 while (next_stmt)
5384 {
5385 gcc_assert (gimple_assign_single_p (next_stmt));
5386 op = gimple_assign_rhs1 (next_stmt);
81c40241 5387 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
ebfd146a 5388 {
73fbfcad 5389 if (dump_enabled_p ())
78c60e3d 5390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5391 "use not simple.\n");
ebfd146a
IR
5392 return false;
5393 }
e14c1050 5394 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5395 }
5396 }
5397 }
5398
3bab6342
AT
5399 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5400 {
355fe088 5401 gimple *def_stmt;
3bab6342
AT
5402 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5403 &scatter_off, &scatter_scale);
5404 gcc_assert (scatter_decl);
81c40241
RB
5405 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5406 &scatter_off_vectype))
3bab6342
AT
5407 {
5408 if (dump_enabled_p ())
5409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5410 "scatter index use not simple.");
5411 return false;
5412 }
5413 }
5414
ebfd146a
IR
5415 if (!vec_stmt) /* transformation not required. */
5416 {
5417 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5418 /* The SLP costs are calculated during SLP analysis. */
5419 if (!PURE_SLP_STMT (stmt_info))
5420 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5421 NULL, NULL, NULL);
ebfd146a
IR
5422 return true;
5423 }
5424
5425 /** Transform. **/
5426
c716e67f
XDL
5427 ensure_base_align (stmt_info, dr);
5428
3bab6342
AT
5429 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5430 {
5431 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5432 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5433 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5434 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5435 edge pe = loop_preheader_edge (loop);
5436 gimple_seq seq;
5437 basic_block new_bb;
5438 enum { NARROW, NONE, WIDEN } modifier;
5439 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5440
5441 if (nunits == (unsigned int) scatter_off_nunits)
5442 modifier = NONE;
5443 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5444 {
5445 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5446 modifier = WIDEN;
5447
5448 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5449 sel[i] = i | nunits;
5450
5451 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5452 gcc_assert (perm_mask != NULL_TREE);
5453 }
5454 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5455 {
5456 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5457 modifier = NARROW;
5458
5459 for (i = 0; i < (unsigned int) nunits; ++i)
5460 sel[i] = i | scatter_off_nunits;
5461
5462 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5463 gcc_assert (perm_mask != NULL_TREE);
5464 ncopies *= 2;
5465 }
5466 else
5467 gcc_unreachable ();
5468
5469 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5470 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5471 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5472 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5473 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5474 scaletype = TREE_VALUE (arglist);
5475
5476 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5477 && TREE_CODE (rettype) == VOID_TYPE);
5478
5479 ptr = fold_convert (ptrtype, scatter_base);
5480 if (!is_gimple_min_invariant (ptr))
5481 {
5482 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5483 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5484 gcc_assert (!new_bb);
5485 }
5486
5487 /* Currently we support only unconditional scatter stores,
5488 so mask should be all ones. */
5489 mask = build_int_cst (masktype, -1);
5490 mask = vect_init_vector (stmt, mask, masktype, NULL);
5491
5492 scale = build_int_cst (scaletype, scatter_scale);
5493
5494 prev_stmt_info = NULL;
5495 for (j = 0; j < ncopies; ++j)
5496 {
5497 if (j == 0)
5498 {
5499 src = vec_oprnd1
81c40241 5500 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
3bab6342 5501 op = vec_oprnd0
81c40241 5502 = vect_get_vec_def_for_operand (scatter_off, stmt);
3bab6342
AT
5503 }
5504 else if (modifier != NONE && (j & 1))
5505 {
5506 if (modifier == WIDEN)
5507 {
5508 src = vec_oprnd1
5509 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5510 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5511 stmt, gsi);
5512 }
5513 else if (modifier == NARROW)
5514 {
5515 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5516 stmt, gsi);
5517 op = vec_oprnd0
5518 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5519 }
5520 else
5521 gcc_unreachable ();
5522 }
5523 else
5524 {
5525 src = vec_oprnd1
5526 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5527 op = vec_oprnd0
5528 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5529 }
5530
5531 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5532 {
5533 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5534 == TYPE_VECTOR_SUBPARTS (srctype));
0e22bb5a 5535 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
5536 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5537 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5538 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5539 src = var;
5540 }
5541
5542 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5543 {
5544 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5545 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 5546 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
5547 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5548 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5550 op = var;
5551 }
5552
5553 new_stmt
5554 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5555
5556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5557
5558 if (prev_stmt_info == NULL)
5559 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5560 else
5561 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5562 prev_stmt_info = vinfo_for_stmt (new_stmt);
5563 }
5564 return true;
5565 }
5566
0d0293ac 5567 if (grouped_store)
ebfd146a
IR
5568 {
5569 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5570 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5571
e14c1050 5572 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5573
5574 /* FORNOW */
a70d6342 5575 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5576
5577 /* We vectorize all the stmts of the interleaving group when we
5578 reach the last stmt in the group. */
e14c1050
IR
5579 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5580 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5581 && !slp)
5582 {
5583 *vec_stmt = NULL;
5584 return true;
5585 }
5586
5587 if (slp)
4b5caab7 5588 {
0d0293ac 5589 grouped_store = false;
4b5caab7
IR
5590 /* VEC_NUM is the number of vect stmts to be created for this
5591 group. */
5592 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5593 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 5594 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 5595 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5596 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5597 }
ebfd146a 5598 else
4b5caab7
IR
5599 /* VEC_NUM is the number of vect stmts to be created for this
5600 group. */
ebfd146a
IR
5601 vec_num = group_size;
5602 }
b8698a0f 5603 else
ebfd146a
IR
5604 {
5605 first_stmt = stmt;
5606 first_dr = dr;
5607 group_size = vec_num = 1;
ebfd146a 5608 }
b8698a0f 5609
73fbfcad 5610 if (dump_enabled_p ())
78c60e3d 5611 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5612 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5613
f2e2a985
MM
5614 if (STMT_VINFO_STRIDED_P (stmt_info))
5615 {
5616 gimple_stmt_iterator incr_gsi;
5617 bool insert_after;
355fe088 5618 gimple *incr;
f2e2a985
MM
5619 tree offvar;
5620 tree ivstep;
5621 tree running_off;
5622 gimple_seq stmts = NULL;
5623 tree stride_base, stride_step, alias_off;
5624 tree vec_oprnd;
f502d50e 5625 unsigned int g;
f2e2a985
MM
5626
5627 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5628
5629 stride_base
5630 = fold_build_pointer_plus
f502d50e 5631 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 5632 size_binop (PLUS_EXPR,
f502d50e
MM
5633 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5634 convert_to_ptrofftype (DR_INIT(first_dr))));
5635 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
5636
5637 /* For a store with loop-invariant (but other than power-of-2)
5638 stride (i.e. not a grouped access) like so:
5639
5640 for (i = 0; i < n; i += stride)
5641 array[i] = ...;
5642
5643 we generate a new induction variable and new stores from
5644 the components of the (vectorized) rhs:
5645
5646 for (j = 0; ; j += VF*stride)
5647 vectemp = ...;
5648 tmp1 = vectemp[0];
5649 array[j] = tmp1;
5650 tmp2 = vectemp[1];
5651 array[j + stride] = tmp2;
5652 ...
5653 */
5654
cee62fee
MM
5655 unsigned nstores = nunits;
5656 tree ltype = elem_type;
5657 if (slp)
5658 {
5659 nstores = nunits / group_size;
5660 if (group_size < nunits)
5661 ltype = build_vector_type (elem_type, group_size);
5662 else
5663 ltype = vectype;
5664 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5665 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
f502d50e 5666 group_size = 1;
cee62fee
MM
5667 }
5668
f2e2a985
MM
5669 ivstep = stride_step;
5670 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5671 build_int_cst (TREE_TYPE (ivstep),
cee62fee 5672 ncopies * nstores));
f2e2a985
MM
5673
5674 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5675
5676 create_iv (stride_base, ivstep, NULL,
5677 loop, &incr_gsi, insert_after,
5678 &offvar, NULL);
5679 incr = gsi_stmt (incr_gsi);
310213d4 5680 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
5681
5682 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5683 if (stmts)
5684 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5685
5686 prev_stmt_info = NULL;
f502d50e
MM
5687 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5688 next_stmt = first_stmt;
5689 for (g = 0; g < group_size; g++)
f2e2a985 5690 {
f502d50e
MM
5691 running_off = offvar;
5692 if (g)
f2e2a985 5693 {
f502d50e
MM
5694 tree size = TYPE_SIZE_UNIT (ltype);
5695 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 5696 size);
f502d50e 5697 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 5698 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 5699 running_off, pos);
f2e2a985 5700 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 5701 running_off = newoff;
f502d50e
MM
5702 }
5703 for (j = 0; j < ncopies; j++)
5704 {
5705 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5706 and first_stmt == stmt. */
5707 if (j == 0)
5708 {
5709 if (slp)
5710 {
5711 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5712 slp_node, -1);
5713 vec_oprnd = vec_oprnds[0];
5714 }
5715 else
5716 {
5717 gcc_assert (gimple_assign_single_p (next_stmt));
5718 op = gimple_assign_rhs1 (next_stmt);
81c40241 5719 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
5720 }
5721 }
f2e2a985 5722 else
f502d50e
MM
5723 {
5724 if (slp)
5725 vec_oprnd = vec_oprnds[j];
5726 else
c079cbac 5727 {
81c40241 5728 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
5729 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5730 }
f502d50e
MM
5731 }
5732
5733 for (i = 0; i < nstores; i++)
5734 {
5735 tree newref, newoff;
355fe088 5736 gimple *incr, *assign;
f502d50e
MM
5737 tree size = TYPE_SIZE (ltype);
5738 /* Extract the i'th component. */
5739 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5740 bitsize_int (i), size);
5741 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5742 size, pos);
5743
5744 elem = force_gimple_operand_gsi (gsi, elem, true,
5745 NULL_TREE, true,
5746 GSI_SAME_STMT);
5747
5748 newref = build2 (MEM_REF, ltype,
5749 running_off, alias_off);
5750
5751 /* And store it to *running_off. */
5752 assign = gimple_build_assign (newref, elem);
5753 vect_finish_stmt_generation (stmt, assign, gsi);
5754
5755 newoff = copy_ssa_name (running_off, NULL);
5756 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5757 running_off, stride_step);
5758 vect_finish_stmt_generation (stmt, incr, gsi);
5759
5760 running_off = newoff;
225ce44b
RB
5761 if (g == group_size - 1
5762 && !slp)
f502d50e
MM
5763 {
5764 if (j == 0 && i == 0)
225ce44b
RB
5765 STMT_VINFO_VEC_STMT (stmt_info)
5766 = *vec_stmt = assign;
f502d50e
MM
5767 else
5768 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5769 prev_stmt_info = vinfo_for_stmt (assign);
5770 }
5771 }
f2e2a985 5772 }
f502d50e 5773 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
f2e2a985
MM
5774 }
5775 return true;
5776 }
5777
9771b263
DN
5778 dr_chain.create (group_size);
5779 oprnds.create (group_size);
ebfd146a 5780
720f5239 5781 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5782 gcc_assert (alignment_support_scheme);
272c6793
RS
5783 /* Targets with store-lane instructions must not require explicit
5784 realignment. */
5785 gcc_assert (!store_lanes_p
5786 || alignment_support_scheme == dr_aligned
5787 || alignment_support_scheme == dr_unaligned_supported);
5788
09dfa495
BM
5789 if (negative)
5790 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5791
272c6793
RS
5792 if (store_lanes_p)
5793 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5794 else
5795 aggr_type = vectype;
ebfd146a
IR
5796
5797 /* In case the vectorization factor (VF) is bigger than the number
5798 of elements that we can fit in a vectype (nunits), we have to generate
5799 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5800 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5801 vect_get_vec_def_for_copy_stmt. */
5802
0d0293ac 5803 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5804
5805 S1: &base + 2 = x2
5806 S2: &base = x0
5807 S3: &base + 1 = x1
5808 S4: &base + 3 = x3
5809
5810 We create vectorized stores starting from base address (the access of the
5811 first stmt in the chain (S2 in the above example), when the last store stmt
5812 of the chain (S4) is reached:
5813
5814 VS1: &base = vx2
5815 VS2: &base + vec_size*1 = vx0
5816 VS3: &base + vec_size*2 = vx1
5817 VS4: &base + vec_size*3 = vx3
5818
5819 Then permutation statements are generated:
5820
3fcc1b55
JJ
5821 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5822 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5823 ...
b8698a0f 5824
ebfd146a
IR
5825 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5826 (the order of the data-refs in the output of vect_permute_store_chain
5827 corresponds to the order of scalar stmts in the interleaving chain - see
5828 the documentation of vect_permute_store_chain()).
5829
5830 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5831 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5832 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5833 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5834 */
5835
5836 prev_stmt_info = NULL;
5837 for (j = 0; j < ncopies; j++)
5838 {
ebfd146a
IR
5839
5840 if (j == 0)
5841 {
5842 if (slp)
5843 {
5844 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5845 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5846 NULL, slp_node, -1);
ebfd146a 5847
9771b263 5848 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5849 }
5850 else
5851 {
b8698a0f
L
5852 /* For interleaved stores we collect vectorized defs for all the
5853 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5854 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5855 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5856
0d0293ac 5857 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5858 OPRNDS are of size 1. */
b8698a0f 5859 next_stmt = first_stmt;
ebfd146a
IR
5860 for (i = 0; i < group_size; i++)
5861 {
b8698a0f
L
5862 /* Since gaps are not supported for interleaved stores,
5863 GROUP_SIZE is the exact number of stmts in the chain.
5864 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5865 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5866 iteration of the loop will be executed. */
5867 gcc_assert (next_stmt
5868 && gimple_assign_single_p (next_stmt));
5869 op = gimple_assign_rhs1 (next_stmt);
5870
81c40241 5871 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
5872 dr_chain.quick_push (vec_oprnd);
5873 oprnds.quick_push (vec_oprnd);
e14c1050 5874 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5875 }
5876 }
5877
5878 /* We should have catched mismatched types earlier. */
5879 gcc_assert (useless_type_conversion_p (vectype,
5880 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5881 bool simd_lane_access_p
5882 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5883 if (simd_lane_access_p
5884 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5885 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5886 && integer_zerop (DR_OFFSET (first_dr))
5887 && integer_zerop (DR_INIT (first_dr))
5888 && alias_sets_conflict_p (get_alias_set (aggr_type),
5889 get_alias_set (DR_REF (first_dr))))
5890 {
5891 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5892 dataref_offset = build_int_cst (reference_alias_ptr_type
5893 (DR_REF (first_dr)), 0);
8928eff3 5894 inv_p = false;
74bf76ed
JJ
5895 }
5896 else
5897 dataref_ptr
5898 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5899 simd_lane_access_p ? loop : NULL,
09dfa495 5900 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5901 simd_lane_access_p, &inv_p);
a70d6342 5902 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5903 }
b8698a0f 5904 else
ebfd146a 5905 {
b8698a0f
L
5906 /* For interleaved stores we created vectorized defs for all the
5907 defs stored in OPRNDS in the previous iteration (previous copy).
5908 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5909 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5910 next copy.
0d0293ac 5911 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5912 OPRNDS are of size 1. */
5913 for (i = 0; i < group_size; i++)
5914 {
9771b263 5915 op = oprnds[i];
81c40241 5916 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 5917 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5918 dr_chain[i] = vec_oprnd;
5919 oprnds[i] = vec_oprnd;
ebfd146a 5920 }
74bf76ed
JJ
5921 if (dataref_offset)
5922 dataref_offset
5923 = int_const_binop (PLUS_EXPR, dataref_offset,
5924 TYPE_SIZE_UNIT (aggr_type));
5925 else
5926 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5927 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5928 }
5929
272c6793 5930 if (store_lanes_p)
ebfd146a 5931 {
272c6793 5932 tree vec_array;
267d3070 5933
272c6793
RS
5934 /* Combine all the vectors into an array. */
5935 vec_array = create_vector_array (vectype, vec_num);
5936 for (i = 0; i < vec_num; i++)
c2d7ab2a 5937 {
9771b263 5938 vec_oprnd = dr_chain[i];
272c6793 5939 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5940 }
b8698a0f 5941
272c6793
RS
5942 /* Emit:
5943 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5944 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5945 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5946 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5947 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5948 }
5949 else
5950 {
5951 new_stmt = NULL;
0d0293ac 5952 if (grouped_store)
272c6793 5953 {
b6b9227d
JJ
5954 if (j == 0)
5955 result_chain.create (group_size);
272c6793
RS
5956 /* Permute. */
5957 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5958 &result_chain);
5959 }
c2d7ab2a 5960
272c6793
RS
5961 next_stmt = first_stmt;
5962 for (i = 0; i < vec_num; i++)
5963 {
644ffefd 5964 unsigned align, misalign;
272c6793
RS
5965
5966 if (i > 0)
5967 /* Bump the vector pointer. */
5968 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5969 stmt, NULL_TREE);
5970
5971 if (slp)
9771b263 5972 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5973 else if (grouped_store)
5974 /* For grouped stores vectorized defs are interleaved in
272c6793 5975 vect_permute_store_chain(). */
9771b263 5976 vec_oprnd = result_chain[i];
272c6793 5977
aed93b23
RB
5978 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5979 dataref_ptr,
5980 dataref_offset
5981 ? dataref_offset
5982 : build_int_cst (reference_alias_ptr_type
5983 (DR_REF (first_dr)), 0));
644ffefd 5984 align = TYPE_ALIGN_UNIT (vectype);
272c6793 5985 if (aligned_access_p (first_dr))
644ffefd 5986 misalign = 0;
272c6793
RS
5987 else if (DR_MISALIGNMENT (first_dr) == -1)
5988 {
52639a61
RB
5989 if (DR_VECT_AUX (first_dr)->base_element_aligned)
5990 align = TYPE_ALIGN_UNIT (elem_type);
5991 else
5992 align = get_object_alignment (DR_REF (first_dr))
5993 / BITS_PER_UNIT;
5994 misalign = 0;
272c6793
RS
5995 TREE_TYPE (data_ref)
5996 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 5997 align * BITS_PER_UNIT);
272c6793
RS
5998 }
5999 else
6000 {
6001 TREE_TYPE (data_ref)
6002 = build_aligned_type (TREE_TYPE (data_ref),
6003 TYPE_ALIGN (elem_type));
644ffefd 6004 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6005 }
aed93b23
RB
6006 if (dataref_offset == NULL_TREE
6007 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6008 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6009 misalign);
c2d7ab2a 6010
f234d260
BM
6011 if (negative
6012 && dt != vect_constant_def
6013 && dt != vect_external_def)
09dfa495
BM
6014 {
6015 tree perm_mask = perm_mask_for_reverse (vectype);
6016 tree perm_dest
6017 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6018 vectype);
b731b390 6019 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6020
6021 /* Generate the permute statement. */
355fe088 6022 gimple *perm_stmt
0d0e4a03
JJ
6023 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6024 vec_oprnd, perm_mask);
09dfa495
BM
6025 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6026
6027 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6028 vec_oprnd = new_temp;
6029 }
6030
272c6793
RS
6031 /* Arguments are ready. Create the new vector stmt. */
6032 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6034
6035 if (slp)
6036 continue;
6037
e14c1050 6038 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6039 if (!next_stmt)
6040 break;
6041 }
ebfd146a 6042 }
1da0876c
RS
6043 if (!slp)
6044 {
6045 if (j == 0)
6046 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6047 else
6048 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6049 prev_stmt_info = vinfo_for_stmt (new_stmt);
6050 }
ebfd146a
IR
6051 }
6052
9771b263
DN
6053 dr_chain.release ();
6054 oprnds.release ();
6055 result_chain.release ();
6056 vec_oprnds.release ();
ebfd146a
IR
6057
6058 return true;
6059}
6060
557be5a8
AL
6061/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6062 VECTOR_CST mask. No checks are made that the target platform supports the
6063 mask, so callers may wish to test can_vec_perm_p separately, or use
6064 vect_gen_perm_mask_checked. */
a1e53f3f 6065
3fcc1b55 6066tree
557be5a8 6067vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
a1e53f3f 6068{
d2a12ae7 6069 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 6070 int i, nunits;
a1e53f3f 6071
22e4dee7 6072 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 6073
96f9265a
RG
6074 mask_elt_type = lang_hooks.types.type_for_mode
6075 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 6076 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 6077
d2a12ae7 6078 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 6079 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
6080 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6081 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 6082
2635892a 6083 return mask_vec;
a1e53f3f
L
6084}
6085
cf7aa6a3
AL
6086/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6087 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6088
6089tree
6090vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6091{
6092 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6093 return vect_gen_perm_mask_any (vectype, sel);
6094}
6095
aec7ae7d
JJ
6096/* Given a vector variable X and Y, that was generated for the scalar
6097 STMT, generate instructions to permute the vector elements of X and Y
6098 using permutation mask MASK_VEC, insert them at *GSI and return the
6099 permuted vector variable. */
a1e53f3f
L
6100
6101static tree
355fe088 6102permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6103 gimple_stmt_iterator *gsi)
a1e53f3f
L
6104{
6105 tree vectype = TREE_TYPE (x);
aec7ae7d 6106 tree perm_dest, data_ref;
355fe088 6107 gimple *perm_stmt;
a1e53f3f 6108
acdcd61b 6109 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 6110 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6111
6112 /* Generate the permute statement. */
0d0e4a03 6113 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6114 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6115
6116 return data_ref;
6117}
6118
6b916b36
RB
6119/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6120 inserting them on the loops preheader edge. Returns true if we
6121 were successful in doing so (and thus STMT can be moved then),
6122 otherwise returns false. */
6123
6124static bool
355fe088 6125hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6126{
6127 ssa_op_iter i;
6128 tree op;
6129 bool any = false;
6130
6131 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6132 {
355fe088 6133 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6134 if (!gimple_nop_p (def_stmt)
6135 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6136 {
6137 /* Make sure we don't need to recurse. While we could do
6138 so in simple cases when there are more complex use webs
6139 we don't have an easy way to preserve stmt order to fulfil
6140 dependencies within them. */
6141 tree op2;
6142 ssa_op_iter i2;
d1417442
JJ
6143 if (gimple_code (def_stmt) == GIMPLE_PHI)
6144 return false;
6b916b36
RB
6145 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6146 {
355fe088 6147 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6148 if (!gimple_nop_p (def_stmt2)
6149 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6150 return false;
6151 }
6152 any = true;
6153 }
6154 }
6155
6156 if (!any)
6157 return true;
6158
6159 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6160 {
355fe088 6161 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6162 if (!gimple_nop_p (def_stmt)
6163 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6164 {
6165 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6166 gsi_remove (&gsi, false);
6167 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6168 }
6169 }
6170
6171 return true;
6172}
6173
ebfd146a
IR
6174/* vectorizable_load.
6175
b8698a0f
L
6176 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6177 can be vectorized.
6178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6181
6182static bool
355fe088 6183vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6184 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6185{
6186 tree scalar_dest;
6187 tree vec_dest = NULL;
6188 tree data_ref = NULL;
6189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6190 stmt_vec_info prev_stmt_info;
ebfd146a 6191 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6192 struct loop *loop = NULL;
ebfd146a 6193 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6194 bool nested_in_vect_loop = false;
c716e67f 6195 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6196 tree elem_type;
ebfd146a 6197 tree new_temp;
ef4bddc2 6198 machine_mode mode;
355fe088 6199 gimple *new_stmt = NULL;
ebfd146a
IR
6200 tree dummy;
6201 enum dr_alignment_support alignment_support_scheme;
6202 tree dataref_ptr = NULL_TREE;
74bf76ed 6203 tree dataref_offset = NULL_TREE;
355fe088 6204 gimple *ptr_incr = NULL;
ebfd146a 6205 int ncopies;
9b999e8c 6206 int i, j, group_size = -1, group_gap_adj;
ebfd146a
IR
6207 tree msq = NULL_TREE, lsq;
6208 tree offset = NULL_TREE;
356bbc4c 6209 tree byte_offset = NULL_TREE;
ebfd146a 6210 tree realignment_token = NULL_TREE;
538dd0b7 6211 gphi *phi = NULL;
6e1aa848 6212 vec<tree> dr_chain = vNULL;
0d0293ac 6213 bool grouped_load = false;
272c6793 6214 bool load_lanes_p = false;
355fe088 6215 gimple *first_stmt;
4f0a0218 6216 gimple *first_stmt_for_drptr = NULL;
ebfd146a 6217 bool inv_p;
319e6439 6218 bool negative = false;
ebfd146a
IR
6219 bool compute_in_loop = false;
6220 struct loop *at_loop;
6221 int vec_num;
6222 bool slp = (slp_node != NULL);
6223 bool slp_perm = false;
6224 enum tree_code code;
a70d6342
IR
6225 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6226 int vf;
272c6793 6227 tree aggr_type;
aec7ae7d
JJ
6228 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6229 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6230 int gather_scale = 1;
6231 enum vect_def_type gather_dt = vect_unknown_def_type;
310213d4 6232 vec_info *vinfo = stmt_info->vinfo;
a70d6342 6233
465c8c19
JJ
6234 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6235 return false;
6236
66c16fd9
RB
6237 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6238 && ! vec_stmt)
465c8c19
JJ
6239 return false;
6240
6241 /* Is vectorizable load? */
6242 if (!is_gimple_assign (stmt))
6243 return false;
6244
6245 scalar_dest = gimple_assign_lhs (stmt);
6246 if (TREE_CODE (scalar_dest) != SSA_NAME)
6247 return false;
6248
6249 code = gimple_assign_rhs_code (stmt);
6250 if (code != ARRAY_REF
6251 && code != BIT_FIELD_REF
6252 && code != INDIRECT_REF
6253 && code != COMPONENT_REF
6254 && code != IMAGPART_EXPR
6255 && code != REALPART_EXPR
6256 && code != MEM_REF
6257 && TREE_CODE_CLASS (code) != tcc_declaration)
6258 return false;
6259
6260 if (!STMT_VINFO_DATA_REF (stmt_info))
6261 return false;
6262
6263 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6264 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6265
a70d6342
IR
6266 if (loop_vinfo)
6267 {
6268 loop = LOOP_VINFO_LOOP (loop_vinfo);
6269 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6270 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6271 }
6272 else
3533e503 6273 vf = 1;
ebfd146a
IR
6274
6275 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6276 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6277 case of SLP. */
fce57248 6278 if (slp)
ebfd146a
IR
6279 ncopies = 1;
6280 else
6281 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6282
6283 gcc_assert (ncopies >= 1);
6284
6285 /* FORNOW. This restriction should be relaxed. */
6286 if (nested_in_vect_loop && ncopies > 1)
6287 {
73fbfcad 6288 if (dump_enabled_p ())
78c60e3d 6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6290 "multiple types in nested loop.\n");
ebfd146a
IR
6291 return false;
6292 }
6293
f2556b68
RB
6294 /* Invalidate assumptions made by dependence analysis when vectorization
6295 on the unrolled body effectively re-orders stmts. */
6296 if (ncopies > 1
6297 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6298 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6299 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6300 {
6301 if (dump_enabled_p ())
6302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6303 "cannot perform implicit CSE when unrolling "
6304 "with negative dependence distance\n");
6305 return false;
6306 }
6307
7b7b1813 6308 elem_type = TREE_TYPE (vectype);
947131ba 6309 mode = TYPE_MODE (vectype);
ebfd146a
IR
6310
6311 /* FORNOW. In some cases can vectorize even if data-type not supported
6312 (e.g. - data copies). */
947131ba 6313 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6314 {
73fbfcad 6315 if (dump_enabled_p ())
78c60e3d 6316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6317 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6318 return false;
6319 }
6320
ebfd146a 6321 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6322 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6323 {
0d0293ac 6324 grouped_load = true;
ebfd146a 6325 /* FORNOW */
3bab6342 6326 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6327
e14c1050 6328 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d3465d72
RS
6329 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6330
fce57248 6331 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
d3465d72
RS
6332 {
6333 if (vect_load_lanes_supported (vectype, group_size))
6334 load_lanes_p = true;
6335 else if (!vect_grouped_load_supported (vectype, group_size))
6336 return false;
6337 }
d5f035ea
RB
6338
6339 /* If this is single-element interleaving with an element distance
6340 that leaves unused vector loads around punt - we at least create
6341 very sub-optimal code in that case (and blow up memory,
6342 see PR65518). */
6343 if (first_stmt == stmt
72c0f643
RB
6344 && !GROUP_NEXT_ELEMENT (stmt_info))
6345 {
6346 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6347 {
6348 if (dump_enabled_p ())
6349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6350 "single-element interleaving not supported "
6351 "for not adjacent vector loads\n");
6352 return false;
6353 }
6354
6355 /* Single-element interleaving requires peeling for gaps. */
836dbb1a 6356 gcc_assert (GROUP_GAP (stmt_info));
72c0f643
RB
6357 }
6358
6359 /* If there is a gap in the end of the group or the group size cannot
6360 be made a multiple of the vector element count then we access excess
6361 elements in the last iteration and thus need to peel that off. */
6362 if (loop_vinfo
6363 && ! STMT_VINFO_STRIDED_P (stmt_info)
836dbb1a 6364 && (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
d3465d72 6365 || (!slp && !load_lanes_p && vf % group_size != 0)))
d5f035ea
RB
6366 {
6367 if (dump_enabled_p ())
6368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
72c0f643
RB
6369 "Data access with gaps requires scalar "
6370 "epilogue loop\n");
6371 if (loop->inner)
6372 {
6373 if (dump_enabled_p ())
6374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6375 "Peeling for outer loop is not supported\n");
6376 return false;
6377 }
6378
6379 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
d5f035ea
RB
6380 }
6381
b1af7da6
RB
6382 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6383 slp_perm = true;
6384
47d3fdb2
RB
6385 /* ??? The following is overly pessimistic (as well as the loop
6386 case above) in the case we can statically determine the excess
6387 elements loaded are within the bounds of a decl that is accessed.
6388 Likewise for BB vectorizations using masked loads is a possibility. */
6389 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6390 {
6391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6392 "BB vectorization with gaps at the end of a load "
6393 "is not supported\n");
6394 return false;
6395 }
6396
f2556b68
RB
6397 /* Invalidate assumptions made by dependence analysis when vectorization
6398 on the unrolled body effectively re-orders stmts. */
6399 if (!PURE_SLP_STMT (stmt_info)
6400 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6401 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6402 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6403 {
6404 if (dump_enabled_p ())
6405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6406 "cannot perform implicit CSE when performing "
6407 "group loads with negative dependence distance\n");
6408 return false;
6409 }
96bb56b2
RB
6410
6411 /* Similarly when the stmt is a load that is both part of a SLP
6412 instance and a loop vectorized stmt via the same-dr mechanism
6413 we have to give up. */
6414 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6415 && (STMT_SLP_TYPE (stmt_info)
6416 != STMT_SLP_TYPE (vinfo_for_stmt
6417 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6418 {
6419 if (dump_enabled_p ())
6420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6421 "conflicting SLP types for CSEd load\n");
6422 return false;
6423 }
ebfd146a
IR
6424 }
6425
a1e53f3f 6426
3bab6342 6427 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
aec7ae7d 6428 {
355fe088 6429 gimple *def_stmt;
3bab6342
AT
6430 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6431 &gather_off, &gather_scale);
aec7ae7d 6432 gcc_assert (gather_decl);
81c40241
RB
6433 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6434 &gather_off_vectype))
aec7ae7d 6435 {
73fbfcad 6436 if (dump_enabled_p ())
78c60e3d 6437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6438 "gather index use not simple.\n");
aec7ae7d
JJ
6439 return false;
6440 }
6441 }
f2e2a985 6442 else if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413 6443 {
fce57248
RS
6444 if (grouped_load
6445 && slp
7b5fc413 6446 && (group_size > nunits
b266b968 6447 || nunits % group_size != 0))
7b5fc413
RB
6448 {
6449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6450 "unhandled strided group load\n");
6451 return false;
6452 }
6453 }
319e6439
RG
6454 else
6455 {
6456 negative = tree_int_cst_compare (nested_in_vect_loop
6457 ? STMT_VINFO_DR_STEP (stmt_info)
6458 : DR_STEP (dr),
6459 size_zero_node) < 0;
6460 if (negative && ncopies > 1)
6461 {
73fbfcad 6462 if (dump_enabled_p ())
78c60e3d 6463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6464 "multiple types with negative step.\n");
319e6439
RG
6465 return false;
6466 }
6467
6468 if (negative)
6469 {
08940f33
RB
6470 if (grouped_load)
6471 {
6472 if (dump_enabled_p ())
6473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6474 "negative step for group load not supported"
6475 "\n");
08940f33
RB
6476 return false;
6477 }
319e6439
RG
6478 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6479 if (alignment_support_scheme != dr_aligned
6480 && alignment_support_scheme != dr_unaligned_supported)
6481 {
73fbfcad 6482 if (dump_enabled_p ())
78c60e3d 6483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6484 "negative step but alignment required.\n");
319e6439
RG
6485 return false;
6486 }
6487 if (!perm_mask_for_reverse (vectype))
6488 {
73fbfcad 6489 if (dump_enabled_p ())
78c60e3d 6490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6491 "negative step and reversing not supported."
6492 "\n");
319e6439
RG
6493 return false;
6494 }
6495 }
7d75abc8 6496 }
aec7ae7d 6497
ebfd146a
IR
6498 if (!vec_stmt) /* transformation not required. */
6499 {
6500 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6501 /* The SLP costs are calculated during SLP analysis. */
6502 if (!PURE_SLP_STMT (stmt_info))
6503 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6504 NULL, NULL, NULL);
ebfd146a
IR
6505 return true;
6506 }
6507
73fbfcad 6508 if (dump_enabled_p ())
78c60e3d 6509 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6510 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
6511
6512 /** Transform. **/
6513
c716e67f
XDL
6514 ensure_base_align (stmt_info, dr);
6515
3bab6342 6516 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
aec7ae7d
JJ
6517 {
6518 tree vec_oprnd0 = NULL_TREE, op;
6519 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6520 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6521 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6522 edge pe = loop_preheader_edge (loop);
6523 gimple_seq seq;
6524 basic_block new_bb;
6525 enum { NARROW, NONE, WIDEN } modifier;
6526 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6527
6528 if (nunits == gather_off_nunits)
6529 modifier = NONE;
6530 else if (nunits == gather_off_nunits / 2)
6531 {
6532 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6533 modifier = WIDEN;
6534
6535 for (i = 0; i < gather_off_nunits; ++i)
6536 sel[i] = i | nunits;
6537
557be5a8 6538 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
aec7ae7d
JJ
6539 }
6540 else if (nunits == gather_off_nunits * 2)
6541 {
6542 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6543 modifier = NARROW;
6544
6545 for (i = 0; i < nunits; ++i)
6546 sel[i] = i < gather_off_nunits
6547 ? i : i + nunits - gather_off_nunits;
6548
557be5a8 6549 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
aec7ae7d
JJ
6550 ncopies *= 2;
6551 }
6552 else
6553 gcc_unreachable ();
6554
6555 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6556 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6557 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6558 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6559 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6560 scaletype = TREE_VALUE (arglist);
d3c2fee0 6561 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6562
6563 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6564
6565 ptr = fold_convert (ptrtype, gather_base);
6566 if (!is_gimple_min_invariant (ptr))
6567 {
6568 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6569 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6570 gcc_assert (!new_bb);
6571 }
6572
6573 /* Currently we support only unconditional gather loads,
6574 so mask should be all ones. */
d3c2fee0
AI
6575 if (TREE_CODE (masktype) == INTEGER_TYPE)
6576 mask = build_int_cst (masktype, -1);
6577 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6578 {
6579 mask = build_int_cst (TREE_TYPE (masktype), -1);
6580 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6581 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6582 }
aec7ae7d
JJ
6583 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6584 {
6585 REAL_VALUE_TYPE r;
6586 long tmp[6];
6587 for (j = 0; j < 6; ++j)
6588 tmp[j] = -1;
6589 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6590 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6591 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6592 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6593 }
6594 else
6595 gcc_unreachable ();
aec7ae7d
JJ
6596
6597 scale = build_int_cst (scaletype, gather_scale);
6598
d3c2fee0
AI
6599 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6600 merge = build_int_cst (TREE_TYPE (rettype), 0);
6601 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6602 {
6603 REAL_VALUE_TYPE r;
6604 long tmp[6];
6605 for (j = 0; j < 6; ++j)
6606 tmp[j] = 0;
6607 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6608 merge = build_real (TREE_TYPE (rettype), r);
6609 }
6610 else
6611 gcc_unreachable ();
6612 merge = build_vector_from_val (rettype, merge);
6613 merge = vect_init_vector (stmt, merge, rettype, NULL);
6614
aec7ae7d
JJ
6615 prev_stmt_info = NULL;
6616 for (j = 0; j < ncopies; ++j)
6617 {
6618 if (modifier == WIDEN && (j & 1))
6619 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6620 perm_mask, stmt, gsi);
6621 else if (j == 0)
6622 op = vec_oprnd0
81c40241 6623 = vect_get_vec_def_for_operand (gather_off, stmt);
aec7ae7d
JJ
6624 else
6625 op = vec_oprnd0
6626 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6627
6628 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6629 {
6630 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6631 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 6632 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
aec7ae7d
JJ
6633 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6634 new_stmt
0d0e4a03 6635 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6636 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6637 op = var;
6638 }
6639
6640 new_stmt
d3c2fee0 6641 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6642
6643 if (!useless_type_conversion_p (vectype, rettype))
6644 {
6645 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6646 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 6647 op = vect_get_new_ssa_name (rettype, vect_simple_var);
aec7ae7d
JJ
6648 gimple_call_set_lhs (new_stmt, op);
6649 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6650 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6651 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6652 new_stmt
0d0e4a03 6653 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6654 }
6655 else
6656 {
6657 var = make_ssa_name (vec_dest, new_stmt);
6658 gimple_call_set_lhs (new_stmt, var);
6659 }
6660
6661 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6662
6663 if (modifier == NARROW)
6664 {
6665 if ((j & 1) == 0)
6666 {
6667 prev_res = var;
6668 continue;
6669 }
6670 var = permute_vec_elements (prev_res, var,
6671 perm_mask, stmt, gsi);
6672 new_stmt = SSA_NAME_DEF_STMT (var);
6673 }
6674
6675 if (prev_stmt_info == NULL)
6676 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6677 else
6678 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6679 prev_stmt_info = vinfo_for_stmt (new_stmt);
6680 }
6681 return true;
6682 }
f2e2a985 6683 else if (STMT_VINFO_STRIDED_P (stmt_info))
7d75abc8
MM
6684 {
6685 gimple_stmt_iterator incr_gsi;
6686 bool insert_after;
355fe088 6687 gimple *incr;
7d75abc8 6688 tree offvar;
7d75abc8
MM
6689 tree ivstep;
6690 tree running_off;
9771b263 6691 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 6692 gimple_seq stmts = NULL;
14ac6aa2
RB
6693 tree stride_base, stride_step, alias_off;
6694
6695 gcc_assert (!nested_in_vect_loop);
7d75abc8 6696
f502d50e 6697 if (slp && grouped_load)
ab313a8c
RB
6698 first_dr = STMT_VINFO_DATA_REF
6699 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6700 else
6701 first_dr = dr;
6702
14ac6aa2
RB
6703 stride_base
6704 = fold_build_pointer_plus
ab313a8c 6705 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 6706 size_binop (PLUS_EXPR,
ab313a8c
RB
6707 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6708 convert_to_ptrofftype (DR_INIT (first_dr))));
6709 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
6710
6711 /* For a load with loop-invariant (but other than power-of-2)
6712 stride (i.e. not a grouped access) like so:
6713
6714 for (i = 0; i < n; i += stride)
6715 ... = array[i];
6716
6717 we generate a new induction variable and new accesses to
6718 form a new vector (or vectors, depending on ncopies):
6719
6720 for (j = 0; ; j += VF*stride)
6721 tmp1 = array[j];
6722 tmp2 = array[j + stride];
6723 ...
6724 vectemp = {tmp1, tmp2, ...}
6725 */
6726
ab313a8c
RB
6727 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6728 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
6729
6730 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6731
ab313a8c 6732 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
6733 loop, &incr_gsi, insert_after,
6734 &offvar, NULL);
6735 incr = gsi_stmt (incr_gsi);
310213d4 6736 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 6737
ab313a8c
RB
6738 stride_step = force_gimple_operand (unshare_expr (stride_step),
6739 &stmts, true, NULL_TREE);
7d75abc8
MM
6740 if (stmts)
6741 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6742
6743 prev_stmt_info = NULL;
6744 running_off = offvar;
ab313a8c 6745 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
7b5fc413
RB
6746 int nloads = nunits;
6747 tree ltype = TREE_TYPE (vectype);
b266b968 6748 auto_vec<tree> dr_chain;
7b5fc413
RB
6749 if (slp)
6750 {
6751 nloads = nunits / group_size;
6752 if (group_size < nunits)
6753 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6754 else
6755 ltype = vectype;
6756 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
66c16fd9
RB
6757 /* For SLP permutation support we need to load the whole group,
6758 not only the number of vector stmts the permutation result
6759 fits in. */
b266b968 6760 if (slp_perm)
66c16fd9
RB
6761 {
6762 ncopies = (group_size * vf + nunits - 1) / nunits;
6763 dr_chain.create (ncopies);
6764 }
6765 else
6766 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 6767 }
7d75abc8
MM
6768 for (j = 0; j < ncopies; j++)
6769 {
6770 tree vec_inv;
6771
7b5fc413
RB
6772 if (nloads > 1)
6773 {
6774 vec_alloc (v, nloads);
6775 for (i = 0; i < nloads; i++)
6776 {
6777 tree newref, newoff;
355fe088 6778 gimple *incr;
7b5fc413
RB
6779 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6780
6781 newref = force_gimple_operand_gsi (gsi, newref, true,
6782 NULL_TREE, true,
6783 GSI_SAME_STMT);
6784 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6785 newoff = copy_ssa_name (running_off);
6786 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6787 running_off, stride_step);
6788 vect_finish_stmt_generation (stmt, incr, gsi);
6789
6790 running_off = newoff;
6791 }
6792
6793 vec_inv = build_constructor (vectype, v);
6794 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6795 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6796 }
6797 else
7d75abc8 6798 {
7b5fc413
RB
6799 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6800 build2 (MEM_REF, ltype,
6801 running_off, alias_off));
6802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6803
6804 tree newoff = copy_ssa_name (running_off);
355fe088 6805 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
0d0e4a03 6806 running_off, stride_step);
7d75abc8
MM
6807 vect_finish_stmt_generation (stmt, incr, gsi);
6808
6809 running_off = newoff;
6810 }
6811
7b5fc413 6812 if (slp)
b266b968 6813 {
b266b968
RB
6814 if (slp_perm)
6815 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
6816 else
6817 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 6818 }
7d75abc8 6819 else
225ce44b
RB
6820 {
6821 if (j == 0)
6822 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6823 else
6824 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6825 prev_stmt_info = vinfo_for_stmt (new_stmt);
6826 }
7d75abc8 6827 }
b266b968
RB
6828 if (slp_perm)
6829 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6830 slp_node_instance, false);
7d75abc8
MM
6831 return true;
6832 }
aec7ae7d 6833
0d0293ac 6834 if (grouped_load)
ebfd146a 6835 {
e14c1050 6836 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4f0a0218 6837 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
6838 without permutation. */
6839 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
6840 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6841 /* For BB vectorization always use the first stmt to base
6842 the data ref pointer on. */
6843 if (bb_vinfo)
6844 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 6845
ebfd146a 6846 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
6847 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6848 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6849 ??? But we can only do so if there is exactly one
6850 as we have no way to get at the rest. Leave the CSE
6851 opportunity alone.
6852 ??? With the group load eventually participating
6853 in multiple different permutations (having multiple
6854 slp nodes which refer to the same group) the CSE
6855 is even wrong code. See PR56270. */
6856 && !slp)
ebfd146a
IR
6857 {
6858 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6859 return true;
6860 }
6861 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6862 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
9b999e8c 6863 group_gap_adj = 0;
ebfd146a
IR
6864
6865 /* VEC_NUM is the number of vect stmts to be created for this group. */
6866 if (slp)
6867 {
0d0293ac 6868 grouped_load = false;
91ff1504
RB
6869 /* For SLP permutation support we need to load the whole group,
6870 not only the number of vector stmts the permutation result
6871 fits in. */
6872 if (slp_perm)
6873 vec_num = (group_size * vf + nunits - 1) / nunits;
6874 else
6875 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9b999e8c 6876 group_gap_adj = vf * group_size - nunits * vec_num;
a70d6342 6877 }
ebfd146a 6878 else
9b999e8c 6879 vec_num = group_size;
ebfd146a
IR
6880 }
6881 else
6882 {
6883 first_stmt = stmt;
6884 first_dr = dr;
6885 group_size = vec_num = 1;
9b999e8c 6886 group_gap_adj = 0;
ebfd146a
IR
6887 }
6888
720f5239 6889 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6890 gcc_assert (alignment_support_scheme);
272c6793
RS
6891 /* Targets with load-lane instructions must not require explicit
6892 realignment. */
6893 gcc_assert (!load_lanes_p
6894 || alignment_support_scheme == dr_aligned
6895 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
6896
6897 /* In case the vectorization factor (VF) is bigger than the number
6898 of elements that we can fit in a vectype (nunits), we have to generate
6899 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 6900 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 6901 from one copy of the vector stmt to the next, in the field
ff802fa1 6902 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 6903 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
6904 stmts that use the defs of the current stmt. The example below
6905 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6906 need to create 4 vectorized stmts):
ebfd146a
IR
6907
6908 before vectorization:
6909 RELATED_STMT VEC_STMT
6910 S1: x = memref - -
6911 S2: z = x + 1 - -
6912
6913 step 1: vectorize stmt S1:
6914 We first create the vector stmt VS1_0, and, as usual, record a
6915 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6916 Next, we create the vector stmt VS1_1, and record a pointer to
6917 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6918 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6919 stmts and pointers:
6920 RELATED_STMT VEC_STMT
6921 VS1_0: vx0 = memref0 VS1_1 -
6922 VS1_1: vx1 = memref1 VS1_2 -
6923 VS1_2: vx2 = memref2 VS1_3 -
6924 VS1_3: vx3 = memref3 - -
6925 S1: x = load - VS1_0
6926 S2: z = x + 1 - -
6927
b8698a0f
L
6928 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6929 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6930 stmt S2. */
6931
0d0293ac 6932 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6933
6934 S1: x2 = &base + 2
6935 S2: x0 = &base
6936 S3: x1 = &base + 1
6937 S4: x3 = &base + 3
6938
b8698a0f 6939 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6940 starting from the access of the first stmt of the chain:
6941
6942 VS1: vx0 = &base
6943 VS2: vx1 = &base + vec_size*1
6944 VS3: vx3 = &base + vec_size*2
6945 VS4: vx4 = &base + vec_size*3
6946
6947 Then permutation statements are generated:
6948
e2c83630
RH
6949 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6950 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6951 ...
6952
6953 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6954 (the order of the data-refs in the output of vect_permute_load_chain
6955 corresponds to the order of scalar stmts in the interleaving chain - see
6956 the documentation of vect_permute_load_chain()).
6957 The generation of permutation stmts and recording them in
0d0293ac 6958 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6959
b8698a0f 6960 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6961 permutation stmts above are created for every copy. The result vector
6962 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6963 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6964
6965 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6966 on a target that supports unaligned accesses (dr_unaligned_supported)
6967 we generate the following code:
6968 p = initial_addr;
6969 indx = 0;
6970 loop {
6971 p = p + indx * vectype_size;
6972 vec_dest = *(p);
6973 indx = indx + 1;
6974 }
6975
6976 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6977 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6978 then generate the following code, in which the data in each iteration is
6979 obtained by two vector loads, one from the previous iteration, and one
6980 from the current iteration:
6981 p1 = initial_addr;
6982 msq_init = *(floor(p1))
6983 p2 = initial_addr + VS - 1;
6984 realignment_token = call target_builtin;
6985 indx = 0;
6986 loop {
6987 p2 = p2 + indx * vectype_size
6988 lsq = *(floor(p2))
6989 vec_dest = realign_load (msq, lsq, realignment_token)
6990 indx = indx + 1;
6991 msq = lsq;
6992 } */
6993
6994 /* If the misalignment remains the same throughout the execution of the
6995 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 6996 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
6997 This can only occur when vectorizing memory accesses in the inner-loop
6998 nested within an outer-loop that is being vectorized. */
6999
d1e4b493 7000 if (nested_in_vect_loop
211bea38 7001 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
7002 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7003 {
7004 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7005 compute_in_loop = true;
7006 }
7007
7008 if ((alignment_support_scheme == dr_explicit_realign_optimized
7009 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7010 && !compute_in_loop)
ebfd146a
IR
7011 {
7012 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7013 alignment_support_scheme, NULL_TREE,
7014 &at_loop);
7015 if (alignment_support_scheme == dr_explicit_realign_optimized)
7016 {
538dd0b7 7017 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7018 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7019 size_one_node);
ebfd146a
IR
7020 }
7021 }
7022 else
7023 at_loop = loop;
7024
a1e53f3f
L
7025 if (negative)
7026 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7027
272c6793
RS
7028 if (load_lanes_p)
7029 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7030 else
7031 aggr_type = vectype;
7032
ebfd146a
IR
7033 prev_stmt_info = NULL;
7034 for (j = 0; j < ncopies; j++)
b8698a0f 7035 {
272c6793 7036 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7037 if (j == 0)
74bf76ed
JJ
7038 {
7039 bool simd_lane_access_p
7040 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7041 if (simd_lane_access_p
7042 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7043 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7044 && integer_zerop (DR_OFFSET (first_dr))
7045 && integer_zerop (DR_INIT (first_dr))
7046 && alias_sets_conflict_p (get_alias_set (aggr_type),
7047 get_alias_set (DR_REF (first_dr)))
7048 && (alignment_support_scheme == dr_aligned
7049 || alignment_support_scheme == dr_unaligned_supported))
7050 {
7051 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7052 dataref_offset = build_int_cst (reference_alias_ptr_type
7053 (DR_REF (first_dr)), 0);
8928eff3 7054 inv_p = false;
74bf76ed 7055 }
4f0a0218
RB
7056 else if (first_stmt_for_drptr
7057 && first_stmt != first_stmt_for_drptr)
7058 {
7059 dataref_ptr
7060 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7061 at_loop, offset, &dummy, gsi,
7062 &ptr_incr, simd_lane_access_p,
7063 &inv_p, byte_offset);
7064 /* Adjust the pointer by the difference to first_stmt. */
7065 data_reference_p ptrdr
7066 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7067 tree diff = fold_convert (sizetype,
7068 size_binop (MINUS_EXPR,
7069 DR_INIT (first_dr),
7070 DR_INIT (ptrdr)));
7071 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7072 stmt, diff);
7073 }
74bf76ed
JJ
7074 else
7075 dataref_ptr
7076 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7077 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7078 simd_lane_access_p, &inv_p,
7079 byte_offset);
74bf76ed
JJ
7080 }
7081 else if (dataref_offset)
7082 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7083 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7084 else
272c6793
RS
7085 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7086 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7087
0d0293ac 7088 if (grouped_load || slp_perm)
9771b263 7089 dr_chain.create (vec_num);
5ce1ee7f 7090
272c6793 7091 if (load_lanes_p)
ebfd146a 7092 {
272c6793
RS
7093 tree vec_array;
7094
7095 vec_array = create_vector_array (vectype, vec_num);
7096
7097 /* Emit:
7098 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7099 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7100 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7101 gimple_call_set_lhs (new_stmt, vec_array);
7102 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7103
272c6793
RS
7104 /* Extract each vector into an SSA_NAME. */
7105 for (i = 0; i < vec_num; i++)
ebfd146a 7106 {
272c6793
RS
7107 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7108 vec_array, i);
9771b263 7109 dr_chain.quick_push (new_temp);
272c6793
RS
7110 }
7111
7112 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7113 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7114 }
7115 else
7116 {
7117 for (i = 0; i < vec_num; i++)
7118 {
7119 if (i > 0)
7120 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7121 stmt, NULL_TREE);
7122
7123 /* 2. Create the vector-load in the loop. */
7124 switch (alignment_support_scheme)
7125 {
7126 case dr_aligned:
7127 case dr_unaligned_supported:
be1ac4ec 7128 {
644ffefd
MJ
7129 unsigned int align, misalign;
7130
272c6793 7131 data_ref
aed93b23
RB
7132 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7133 dataref_offset
7134 ? dataref_offset
7135 : build_int_cst (reference_alias_ptr_type
7136 (DR_REF (first_dr)), 0));
644ffefd 7137 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
7138 if (alignment_support_scheme == dr_aligned)
7139 {
7140 gcc_assert (aligned_access_p (first_dr));
644ffefd 7141 misalign = 0;
272c6793
RS
7142 }
7143 else if (DR_MISALIGNMENT (first_dr) == -1)
7144 {
52639a61
RB
7145 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7146 align = TYPE_ALIGN_UNIT (elem_type);
7147 else
7148 align = (get_object_alignment (DR_REF (first_dr))
7149 / BITS_PER_UNIT);
7150 misalign = 0;
272c6793
RS
7151 TREE_TYPE (data_ref)
7152 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 7153 align * BITS_PER_UNIT);
272c6793
RS
7154 }
7155 else
7156 {
7157 TREE_TYPE (data_ref)
7158 = build_aligned_type (TREE_TYPE (data_ref),
7159 TYPE_ALIGN (elem_type));
644ffefd 7160 misalign = DR_MISALIGNMENT (first_dr);
272c6793 7161 }
aed93b23
RB
7162 if (dataref_offset == NULL_TREE
7163 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7164 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7165 align, misalign);
272c6793 7166 break;
be1ac4ec 7167 }
272c6793 7168 case dr_explicit_realign:
267d3070 7169 {
272c6793 7170 tree ptr, bump;
272c6793 7171
d88981fc 7172 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7173
7174 if (compute_in_loop)
7175 msq = vect_setup_realignment (first_stmt, gsi,
7176 &realignment_token,
7177 dr_explicit_realign,
7178 dataref_ptr, NULL);
7179
aed93b23
RB
7180 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7181 ptr = copy_ssa_name (dataref_ptr);
7182 else
7183 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
0d0e4a03
JJ
7184 new_stmt = gimple_build_assign
7185 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7186 build_int_cst
7187 (TREE_TYPE (dataref_ptr),
7188 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
7189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7190 data_ref
7191 = build2 (MEM_REF, vectype, ptr,
7192 build_int_cst (reference_alias_ptr_type
7193 (DR_REF (first_dr)), 0));
7194 vec_dest = vect_create_destination_var (scalar_dest,
7195 vectype);
7196 new_stmt = gimple_build_assign (vec_dest, data_ref);
7197 new_temp = make_ssa_name (vec_dest, new_stmt);
7198 gimple_assign_set_lhs (new_stmt, new_temp);
7199 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7200 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7201 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7202 msq = new_temp;
7203
d88981fc 7204 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7205 TYPE_SIZE_UNIT (elem_type));
d88981fc 7206 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7207 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7208 new_stmt = gimple_build_assign
7209 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793
RS
7210 build_int_cst
7211 (TREE_TYPE (ptr),
7212 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
aed93b23 7213 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7214 gimple_assign_set_lhs (new_stmt, ptr);
7215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7216 data_ref
7217 = build2 (MEM_REF, vectype, ptr,
7218 build_int_cst (reference_alias_ptr_type
7219 (DR_REF (first_dr)), 0));
7220 break;
267d3070 7221 }
272c6793 7222 case dr_explicit_realign_optimized:
aed93b23
RB
7223 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7224 new_temp = copy_ssa_name (dataref_ptr);
7225 else
7226 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
0d0e4a03
JJ
7227 new_stmt = gimple_build_assign
7228 (new_temp, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7229 build_int_cst
7230 (TREE_TYPE (dataref_ptr),
7231 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
7232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7233 data_ref
7234 = build2 (MEM_REF, vectype, new_temp,
7235 build_int_cst (reference_alias_ptr_type
7236 (DR_REF (first_dr)), 0));
7237 break;
7238 default:
7239 gcc_unreachable ();
7240 }
ebfd146a 7241 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 7242 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
7243 new_temp = make_ssa_name (vec_dest, new_stmt);
7244 gimple_assign_set_lhs (new_stmt, new_temp);
7245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7246
272c6793
RS
7247 /* 3. Handle explicit realignment if necessary/supported.
7248 Create in loop:
7249 vec_dest = realign_load (msq, lsq, realignment_token) */
7250 if (alignment_support_scheme == dr_explicit_realign_optimized
7251 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7252 {
272c6793
RS
7253 lsq = gimple_assign_lhs (new_stmt);
7254 if (!realignment_token)
7255 realignment_token = dataref_ptr;
7256 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7257 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7258 msq, lsq, realignment_token);
272c6793
RS
7259 new_temp = make_ssa_name (vec_dest, new_stmt);
7260 gimple_assign_set_lhs (new_stmt, new_temp);
7261 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7262
7263 if (alignment_support_scheme == dr_explicit_realign_optimized)
7264 {
7265 gcc_assert (phi);
7266 if (i == vec_num - 1 && j == ncopies - 1)
7267 add_phi_arg (phi, lsq,
7268 loop_latch_edge (containing_loop),
9e227d60 7269 UNKNOWN_LOCATION);
272c6793
RS
7270 msq = lsq;
7271 }
ebfd146a 7272 }
ebfd146a 7273
59fd17e3
RB
7274 /* 4. Handle invariant-load. */
7275 if (inv_p && !bb_vinfo)
7276 {
59fd17e3 7277 gcc_assert (!grouped_load);
d1417442
JJ
7278 /* If we have versioned for aliasing or the loop doesn't
7279 have any data dependencies that would preclude this,
7280 then we are sure this is a loop invariant load and
7281 thus we can insert it on the preheader edge. */
7282 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7283 && !nested_in_vect_loop
6b916b36 7284 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7285 {
7286 if (dump_enabled_p ())
7287 {
7288 dump_printf_loc (MSG_NOTE, vect_location,
7289 "hoisting out of the vectorized "
7290 "loop: ");
7291 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7292 }
b731b390 7293 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7294 gsi_insert_on_edge_immediate
7295 (loop_preheader_edge (loop),
7296 gimple_build_assign (tem,
7297 unshare_expr
7298 (gimple_assign_rhs1 (stmt))));
7299 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7300 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7301 set_vinfo_for_stmt (new_stmt,
7302 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7303 }
7304 else
7305 {
7306 gimple_stmt_iterator gsi2 = *gsi;
7307 gsi_next (&gsi2);
7308 new_temp = vect_init_vector (stmt, scalar_dest,
7309 vectype, &gsi2);
34cd48e5 7310 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7311 }
59fd17e3
RB
7312 }
7313
272c6793
RS
7314 if (negative)
7315 {
aec7ae7d
JJ
7316 tree perm_mask = perm_mask_for_reverse (vectype);
7317 new_temp = permute_vec_elements (new_temp, new_temp,
7318 perm_mask, stmt, gsi);
ebfd146a
IR
7319 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7320 }
267d3070 7321
272c6793 7322 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7323 vect_transform_grouped_load (). */
7324 if (grouped_load || slp_perm)
9771b263 7325 dr_chain.quick_push (new_temp);
267d3070 7326
272c6793
RS
7327 /* Store vector loads in the corresponding SLP_NODE. */
7328 if (slp && !slp_perm)
9771b263 7329 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 7330 }
9b999e8c
RB
7331 /* Bump the vector pointer to account for a gap or for excess
7332 elements loaded for a permuted SLP load. */
7333 if (group_gap_adj != 0)
a64b9c26 7334 {
9b999e8c
RB
7335 bool ovf;
7336 tree bump
7337 = wide_int_to_tree (sizetype,
7338 wi::smul (TYPE_SIZE_UNIT (elem_type),
7339 group_gap_adj, &ovf));
a64b9c26
RB
7340 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7341 stmt, bump);
7342 }
ebfd146a
IR
7343 }
7344
7345 if (slp && !slp_perm)
7346 continue;
7347
7348 if (slp_perm)
7349 {
01d8bf07 7350 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
7351 slp_node_instance, false))
7352 {
9771b263 7353 dr_chain.release ();
ebfd146a
IR
7354 return false;
7355 }
7356 }
7357 else
7358 {
0d0293ac 7359 if (grouped_load)
ebfd146a 7360 {
272c6793 7361 if (!load_lanes_p)
0d0293ac 7362 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7363 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7364 }
7365 else
7366 {
7367 if (j == 0)
7368 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7369 else
7370 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7371 prev_stmt_info = vinfo_for_stmt (new_stmt);
7372 }
7373 }
9771b263 7374 dr_chain.release ();
ebfd146a
IR
7375 }
7376
ebfd146a
IR
7377 return true;
7378}
7379
7380/* Function vect_is_simple_cond.
b8698a0f 7381
ebfd146a
IR
7382 Input:
7383 LOOP - the loop that is being vectorized.
7384 COND - Condition that is checked for simple use.
7385
e9e1d143
RG
7386 Output:
7387 *COMP_VECTYPE - the vector type for the comparison.
7388
ebfd146a
IR
7389 Returns whether a COND can be vectorized. Checks whether
7390 condition operands are supportable using vec_is_simple_use. */
7391
87aab9b2 7392static bool
81c40241 7393vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
ebfd146a
IR
7394{
7395 tree lhs, rhs;
ebfd146a 7396 enum vect_def_type dt;
e9e1d143 7397 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7398
a414c77f
IE
7399 /* Mask case. */
7400 if (TREE_CODE (cond) == SSA_NAME
7401 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7402 {
7403 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7404 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7405 &dt, comp_vectype)
7406 || !*comp_vectype
7407 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7408 return false;
7409 return true;
7410 }
7411
ebfd146a
IR
7412 if (!COMPARISON_CLASS_P (cond))
7413 return false;
7414
7415 lhs = TREE_OPERAND (cond, 0);
7416 rhs = TREE_OPERAND (cond, 1);
7417
7418 if (TREE_CODE (lhs) == SSA_NAME)
7419 {
355fe088 7420 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
81c40241 7421 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
ebfd146a
IR
7422 return false;
7423 }
7424 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7425 && TREE_CODE (lhs) != FIXED_CST)
7426 return false;
7427
7428 if (TREE_CODE (rhs) == SSA_NAME)
7429 {
355fe088 7430 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
81c40241 7431 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
ebfd146a
IR
7432 return false;
7433 }
f7e531cf 7434 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
7435 && TREE_CODE (rhs) != FIXED_CST)
7436 return false;
7437
28b33016
IE
7438 if (vectype1 && vectype2
7439 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7440 return false;
7441
e9e1d143 7442 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
7443 return true;
7444}
7445
7446/* vectorizable_condition.
7447
b8698a0f
L
7448 Check if STMT is conditional modify expression that can be vectorized.
7449 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7450 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
7451 at GSI.
7452
7453 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7454 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 7455 else clause if it is 2).
ebfd146a
IR
7456
7457 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7458
4bbe8262 7459bool
355fe088
TS
7460vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7461 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 7462 slp_tree slp_node)
ebfd146a
IR
7463{
7464 tree scalar_dest = NULL_TREE;
7465 tree vec_dest = NULL_TREE;
ebfd146a
IR
7466 tree cond_expr, then_clause, else_clause;
7467 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 7468 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7469 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7470 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 7471 tree vec_compare;
ebfd146a
IR
7472 tree new_temp;
7473 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a855b1b1 7474 enum vect_def_type dt, dts[4];
f7e531cf 7475 int ncopies;
ebfd146a 7476 enum tree_code code;
a855b1b1 7477 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
7478 int i, j;
7479 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
7480 vec<tree> vec_oprnds0 = vNULL;
7481 vec<tree> vec_oprnds1 = vNULL;
7482 vec<tree> vec_oprnds2 = vNULL;
7483 vec<tree> vec_oprnds3 = vNULL;
74946978 7484 tree vec_cmp_type;
a414c77f 7485 bool masked = false;
b8698a0f 7486
f7e531cf
IR
7487 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7488 return false;
7489
af29617a
AH
7490 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7491 {
7492 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7493 return false;
ebfd146a 7494
af29617a
AH
7495 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7496 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7497 && reduc_def))
7498 return false;
ebfd146a 7499
af29617a
AH
7500 /* FORNOW: not yet supported. */
7501 if (STMT_VINFO_LIVE_P (stmt_info))
7502 {
7503 if (dump_enabled_p ())
7504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7505 "value used after loop.\n");
7506 return false;
7507 }
ebfd146a
IR
7508 }
7509
7510 /* Is vectorizable conditional operation? */
7511 if (!is_gimple_assign (stmt))
7512 return false;
7513
7514 code = gimple_assign_rhs_code (stmt);
7515
7516 if (code != COND_EXPR)
7517 return false;
7518
465c8c19
JJ
7519 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7520 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2947d3b2 7521 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19 7522
fce57248 7523 if (slp_node)
465c8c19
JJ
7524 ncopies = 1;
7525 else
7526 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7527
7528 gcc_assert (ncopies >= 1);
7529 if (reduc_index && ncopies > 1)
7530 return false; /* FORNOW */
7531
4e71066d
RG
7532 cond_expr = gimple_assign_rhs1 (stmt);
7533 then_clause = gimple_assign_rhs2 (stmt);
7534 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 7535
81c40241 7536 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
e9e1d143 7537 || !comp_vectype)
ebfd146a
IR
7538 return false;
7539
81c40241 7540 gimple *def_stmt;
2947d3b2
IE
7541 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7542 &vectype1))
7543 return false;
7544 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7545 &vectype2))
ebfd146a 7546 return false;
2947d3b2
IE
7547
7548 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7549 return false;
7550
7551 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
7552 return false;
7553
28b33016
IE
7554 masked = !COMPARISON_CLASS_P (cond_expr);
7555 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7556
74946978
MP
7557 if (vec_cmp_type == NULL_TREE)
7558 return false;
784fb9b3 7559
b8698a0f 7560 if (!vec_stmt)
ebfd146a
IR
7561 {
7562 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 7563 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
7564 }
7565
f7e531cf
IR
7566 /* Transform. */
7567
7568 if (!slp_node)
7569 {
9771b263
DN
7570 vec_oprnds0.create (1);
7571 vec_oprnds1.create (1);
7572 vec_oprnds2.create (1);
7573 vec_oprnds3.create (1);
f7e531cf 7574 }
ebfd146a
IR
7575
7576 /* Handle def. */
7577 scalar_dest = gimple_assign_lhs (stmt);
7578 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7579
7580 /* Handle cond expr. */
a855b1b1
MM
7581 for (j = 0; j < ncopies; j++)
7582 {
538dd0b7 7583 gassign *new_stmt = NULL;
a855b1b1
MM
7584 if (j == 0)
7585 {
f7e531cf
IR
7586 if (slp_node)
7587 {
00f96dc9
TS
7588 auto_vec<tree, 4> ops;
7589 auto_vec<vec<tree>, 4> vec_defs;
9771b263 7590
a414c77f
IE
7591 if (masked)
7592 ops.safe_push (cond_expr);
7593 else
7594 {
7595 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7596 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7597 }
9771b263
DN
7598 ops.safe_push (then_clause);
7599 ops.safe_push (else_clause);
f7e531cf 7600 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
7601 vec_oprnds3 = vec_defs.pop ();
7602 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
7603 if (!masked)
7604 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 7605 vec_oprnds0 = vec_defs.pop ();
f7e531cf 7606
9771b263
DN
7607 ops.release ();
7608 vec_defs.release ();
f7e531cf
IR
7609 }
7610 else
7611 {
355fe088 7612 gimple *gtemp;
a414c77f
IE
7613 if (masked)
7614 {
7615 vec_cond_lhs
7616 = vect_get_vec_def_for_operand (cond_expr, stmt,
7617 comp_vectype);
7618 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7619 &gtemp, &dts[0]);
7620 }
7621 else
7622 {
7623 vec_cond_lhs =
7624 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7625 stmt, comp_vectype);
7626 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7627 loop_vinfo, &gtemp, &dts[0]);
7628
7629 vec_cond_rhs =
7630 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7631 stmt, comp_vectype);
7632 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7633 loop_vinfo, &gtemp, &dts[1]);
7634 }
f7e531cf
IR
7635 if (reduc_index == 1)
7636 vec_then_clause = reduc_def;
7637 else
7638 {
7639 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
7640 stmt);
7641 vect_is_simple_use (then_clause, loop_vinfo,
7642 &gtemp, &dts[2]);
f7e531cf
IR
7643 }
7644 if (reduc_index == 2)
7645 vec_else_clause = reduc_def;
7646 else
7647 {
7648 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
7649 stmt);
7650 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 7651 }
a855b1b1
MM
7652 }
7653 }
7654 else
7655 {
a414c77f
IE
7656 vec_cond_lhs
7657 = vect_get_vec_def_for_stmt_copy (dts[0],
7658 vec_oprnds0.pop ());
7659 if (!masked)
7660 vec_cond_rhs
7661 = vect_get_vec_def_for_stmt_copy (dts[1],
7662 vec_oprnds1.pop ());
7663
a855b1b1 7664 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 7665 vec_oprnds2.pop ());
a855b1b1 7666 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 7667 vec_oprnds3.pop ());
f7e531cf
IR
7668 }
7669
7670 if (!slp_node)
7671 {
9771b263 7672 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
7673 if (!masked)
7674 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
7675 vec_oprnds2.quick_push (vec_then_clause);
7676 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
7677 }
7678
9dc3f7de 7679 /* Arguments are ready. Create the new vector stmt. */
9771b263 7680 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 7681 {
9771b263
DN
7682 vec_then_clause = vec_oprnds2[i];
7683 vec_else_clause = vec_oprnds3[i];
a855b1b1 7684
a414c77f
IE
7685 if (masked)
7686 vec_compare = vec_cond_lhs;
7687 else
7688 {
7689 vec_cond_rhs = vec_oprnds1[i];
7690 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7691 vec_cond_lhs, vec_cond_rhs);
7692 }
5958f9e2
JJ
7693 new_temp = make_ssa_name (vec_dest);
7694 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7695 vec_compare, vec_then_clause,
7696 vec_else_clause);
f7e531cf
IR
7697 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7698 if (slp_node)
9771b263 7699 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
7700 }
7701
7702 if (slp_node)
7703 continue;
7704
7705 if (j == 0)
7706 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7707 else
7708 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7709
7710 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 7711 }
b8698a0f 7712
9771b263
DN
7713 vec_oprnds0.release ();
7714 vec_oprnds1.release ();
7715 vec_oprnds2.release ();
7716 vec_oprnds3.release ();
f7e531cf 7717
ebfd146a
IR
7718 return true;
7719}
7720
42fd8198
IE
7721/* vectorizable_comparison.
7722
7723 Check if STMT is comparison expression that can be vectorized.
7724 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7725 comparison, put it in VEC_STMT, and insert it at GSI.
7726
7727 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7728
fce57248 7729static bool
42fd8198
IE
7730vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7731 gimple **vec_stmt, tree reduc_def,
7732 slp_tree slp_node)
7733{
7734 tree lhs, rhs1, rhs2;
7735 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7736 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7737 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7738 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7739 tree new_temp;
7740 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7741 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7742 unsigned nunits;
7743 int ncopies;
7744 enum tree_code code;
7745 stmt_vec_info prev_stmt_info = NULL;
7746 int i, j;
7747 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7748 vec<tree> vec_oprnds0 = vNULL;
7749 vec<tree> vec_oprnds1 = vNULL;
7750 gimple *def_stmt;
7751 tree mask_type;
7752 tree mask;
7753
c245362b
IE
7754 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7755 return false;
7756
30480bcd 7757 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
42fd8198
IE
7758 return false;
7759
7760 mask_type = vectype;
7761 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7762
fce57248 7763 if (slp_node)
42fd8198
IE
7764 ncopies = 1;
7765 else
7766 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7767
7768 gcc_assert (ncopies >= 1);
42fd8198
IE
7769 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7770 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7771 && reduc_def))
7772 return false;
7773
7774 if (STMT_VINFO_LIVE_P (stmt_info))
7775 {
7776 if (dump_enabled_p ())
7777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7778 "value used after loop.\n");
7779 return false;
7780 }
7781
7782 if (!is_gimple_assign (stmt))
7783 return false;
7784
7785 code = gimple_assign_rhs_code (stmt);
7786
7787 if (TREE_CODE_CLASS (code) != tcc_comparison)
7788 return false;
7789
7790 rhs1 = gimple_assign_rhs1 (stmt);
7791 rhs2 = gimple_assign_rhs2 (stmt);
7792
7793 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7794 &dts[0], &vectype1))
7795 return false;
7796
7797 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7798 &dts[1], &vectype2))
7799 return false;
7800
7801 if (vectype1 && vectype2
7802 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7803 return false;
7804
7805 vectype = vectype1 ? vectype1 : vectype2;
7806
7807 /* Invariant comparison. */
7808 if (!vectype)
7809 {
69a9a66f
RB
7810 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7811 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
42fd8198
IE
7812 return false;
7813 }
7814 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7815 return false;
7816
7817 if (!vec_stmt)
7818 {
7819 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7820 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7821 return expand_vec_cmp_expr_p (vectype, mask_type);
7822 }
7823
7824 /* Transform. */
7825 if (!slp_node)
7826 {
7827 vec_oprnds0.create (1);
7828 vec_oprnds1.create (1);
7829 }
7830
7831 /* Handle def. */
7832 lhs = gimple_assign_lhs (stmt);
7833 mask = vect_create_destination_var (lhs, mask_type);
7834
7835 /* Handle cmp expr. */
7836 for (j = 0; j < ncopies; j++)
7837 {
7838 gassign *new_stmt = NULL;
7839 if (j == 0)
7840 {
7841 if (slp_node)
7842 {
7843 auto_vec<tree, 2> ops;
7844 auto_vec<vec<tree>, 2> vec_defs;
7845
7846 ops.safe_push (rhs1);
7847 ops.safe_push (rhs2);
7848 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7849 vec_oprnds1 = vec_defs.pop ();
7850 vec_oprnds0 = vec_defs.pop ();
7851 }
7852 else
7853 {
e4af0bc4
IE
7854 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7855 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
7856 }
7857 }
7858 else
7859 {
7860 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7861 vec_oprnds0.pop ());
7862 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7863 vec_oprnds1.pop ());
7864 }
7865
7866 if (!slp_node)
7867 {
7868 vec_oprnds0.quick_push (vec_rhs1);
7869 vec_oprnds1.quick_push (vec_rhs2);
7870 }
7871
7872 /* Arguments are ready. Create the new vector stmt. */
7873 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7874 {
7875 vec_rhs2 = vec_oprnds1[i];
7876
7877 new_temp = make_ssa_name (mask);
7878 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7879 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7880 if (slp_node)
7881 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7882 }
7883
7884 if (slp_node)
7885 continue;
7886
7887 if (j == 0)
7888 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7889 else
7890 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7891
7892 prev_stmt_info = vinfo_for_stmt (new_stmt);
7893 }
7894
7895 vec_oprnds0.release ();
7896 vec_oprnds1.release ();
7897
7898 return true;
7899}
ebfd146a 7900
8644a673 7901/* Make sure the statement is vectorizable. */
ebfd146a
IR
7902
7903bool
355fe088 7904vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 7905{
8644a673 7906 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 7907 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 7908 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 7909 bool ok;
a70d6342 7910 tree scalar_type, vectype;
355fe088 7911 gimple *pattern_stmt;
363477c0 7912 gimple_seq pattern_def_seq;
ebfd146a 7913
73fbfcad 7914 if (dump_enabled_p ())
ebfd146a 7915 {
78c60e3d
SS
7916 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7917 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 7918 }
ebfd146a 7919
1825a1f3 7920 if (gimple_has_volatile_ops (stmt))
b8698a0f 7921 {
73fbfcad 7922 if (dump_enabled_p ())
78c60e3d 7923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7924 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
7925
7926 return false;
7927 }
b8698a0f
L
7928
7929 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
7930 to include:
7931 - the COND_EXPR which is the loop exit condition
7932 - any LABEL_EXPRs in the loop
b8698a0f 7933 - computations that are used only for array indexing or loop control.
8644a673 7934 In basic blocks we only analyze statements that are a part of some SLP
83197f37 7935 instance, therefore, all the statements are relevant.
ebfd146a 7936
d092494c 7937 Pattern statement needs to be analyzed instead of the original statement
83197f37 7938 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
7939 statements. In basic blocks we are called from some SLP instance
7940 traversal, don't analyze pattern stmts instead, the pattern stmts
7941 already will be part of SLP instance. */
83197f37
IR
7942
7943 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 7944 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 7945 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 7946 {
9d5e7640 7947 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 7948 && pattern_stmt
9d5e7640
IR
7949 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7950 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7951 {
83197f37 7952 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
7953 stmt = pattern_stmt;
7954 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 7955 if (dump_enabled_p ())
9d5e7640 7956 {
78c60e3d
SS
7957 dump_printf_loc (MSG_NOTE, vect_location,
7958 "==> examining pattern statement: ");
7959 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
7960 }
7961 }
7962 else
7963 {
73fbfcad 7964 if (dump_enabled_p ())
e645e942 7965 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 7966
9d5e7640
IR
7967 return true;
7968 }
8644a673 7969 }
83197f37 7970 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 7971 && node == NULL
83197f37
IR
7972 && pattern_stmt
7973 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7974 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7975 {
7976 /* Analyze PATTERN_STMT too. */
73fbfcad 7977 if (dump_enabled_p ())
83197f37 7978 {
78c60e3d
SS
7979 dump_printf_loc (MSG_NOTE, vect_location,
7980 "==> examining pattern statement: ");
7981 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
7982 }
7983
7984 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7985 return false;
7986 }
ebfd146a 7987
1107f3ae 7988 if (is_pattern_stmt_p (stmt_info)
079c527f 7989 && node == NULL
363477c0 7990 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 7991 {
363477c0 7992 gimple_stmt_iterator si;
1107f3ae 7993
363477c0
JJ
7994 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7995 {
355fe088 7996 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
7997 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7998 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7999 {
8000 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8001 if (dump_enabled_p ())
363477c0 8002 {
78c60e3d
SS
8003 dump_printf_loc (MSG_NOTE, vect_location,
8004 "==> examining pattern def statement: ");
8005 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8006 }
1107f3ae 8007
363477c0
JJ
8008 if (!vect_analyze_stmt (pattern_def_stmt,
8009 need_to_vectorize, node))
8010 return false;
8011 }
8012 }
8013 }
1107f3ae 8014
8644a673
IR
8015 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8016 {
8017 case vect_internal_def:
8018 break;
ebfd146a 8019
8644a673 8020 case vect_reduction_def:
7c5222ff 8021 case vect_nested_cycle:
14a61437
RB
8022 gcc_assert (!bb_vinfo
8023 && (relevance == vect_used_in_outer
8024 || relevance == vect_used_in_outer_by_reduction
8025 || relevance == vect_used_by_reduction
b28ead45
AH
8026 || relevance == vect_unused_in_scope
8027 || relevance == vect_used_only_live));
8644a673
IR
8028 break;
8029
8030 case vect_induction_def:
8031 case vect_constant_def:
8032 case vect_external_def:
8033 case vect_unknown_def_type:
8034 default:
8035 gcc_unreachable ();
8036 }
ebfd146a 8037
a70d6342
IR
8038 if (bb_vinfo)
8039 {
8040 gcc_assert (PURE_SLP_STMT (stmt_info));
8041
b690cc0f 8042 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 8043 if (dump_enabled_p ())
a70d6342 8044 {
78c60e3d
SS
8045 dump_printf_loc (MSG_NOTE, vect_location,
8046 "get vectype for scalar type: ");
8047 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 8048 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
8049 }
8050
8051 vectype = get_vectype_for_scalar_type (scalar_type);
8052 if (!vectype)
8053 {
73fbfcad 8054 if (dump_enabled_p ())
a70d6342 8055 {
78c60e3d
SS
8056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8057 "not SLPed: unsupported data-type ");
8058 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8059 scalar_type);
e645e942 8060 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
8061 }
8062 return false;
8063 }
8064
73fbfcad 8065 if (dump_enabled_p ())
a70d6342 8066 {
78c60e3d
SS
8067 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8068 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 8069 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
8070 }
8071
8072 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8073 }
8074
8644a673 8075 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8076 {
8644a673 8077 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8078 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8079 || (is_gimple_call (stmt)
8080 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8081 *need_to_vectorize = true;
ebfd146a
IR
8082 }
8083
b1af7da6
RB
8084 if (PURE_SLP_STMT (stmt_info) && !node)
8085 {
8086 dump_printf_loc (MSG_NOTE, vect_location,
8087 "handled only by SLP analysis\n");
8088 return true;
8089 }
8090
8091 ok = true;
8092 if (!bb_vinfo
8093 && (STMT_VINFO_RELEVANT_P (stmt_info)
8094 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8095 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8096 || vectorizable_conversion (stmt, NULL, NULL, node)
8097 || vectorizable_shift (stmt, NULL, NULL, node)
8098 || vectorizable_operation (stmt, NULL, NULL, node)
8099 || vectorizable_assignment (stmt, NULL, NULL, node)
8100 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8101 || vectorizable_call (stmt, NULL, NULL, node)
8102 || vectorizable_store (stmt, NULL, NULL, node)
8103 || vectorizable_reduction (stmt, NULL, NULL, node)
42fd8198
IE
8104 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8105 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8106 else
8107 {
8108 if (bb_vinfo)
8109 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8110 || vectorizable_conversion (stmt, NULL, NULL, node)
8111 || vectorizable_shift (stmt, NULL, NULL, node)
8112 || vectorizable_operation (stmt, NULL, NULL, node)
8113 || vectorizable_assignment (stmt, NULL, NULL, node)
8114 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8115 || vectorizable_call (stmt, NULL, NULL, node)
8116 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8117 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8118 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8119 }
8644a673
IR
8120
8121 if (!ok)
ebfd146a 8122 {
73fbfcad 8123 if (dump_enabled_p ())
8644a673 8124 {
78c60e3d
SS
8125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8126 "not vectorized: relevant stmt not ");
8127 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8128 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8129 }
b8698a0f 8130
ebfd146a
IR
8131 return false;
8132 }
8133
a70d6342
IR
8134 if (bb_vinfo)
8135 return true;
8136
8644a673
IR
8137 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8138 need extra handling, except for vectorizable reductions. */
8139 if (STMT_VINFO_LIVE_P (stmt_info)
8140 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
b28ead45 8141 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
ebfd146a 8142
8644a673 8143 if (!ok)
ebfd146a 8144 {
73fbfcad 8145 if (dump_enabled_p ())
8644a673 8146 {
78c60e3d
SS
8147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8148 "not vectorized: live stmt not ");
8149 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8150 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8151 }
b8698a0f 8152
8644a673 8153 return false;
ebfd146a
IR
8154 }
8155
ebfd146a
IR
8156 return true;
8157}
8158
8159
8160/* Function vect_transform_stmt.
8161
8162 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8163
8164bool
355fe088 8165vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8166 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8167 slp_instance slp_node_instance)
8168{
8169 bool is_store = false;
355fe088 8170 gimple *vec_stmt = NULL;
ebfd146a 8171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8172 bool done;
ebfd146a 8173
fce57248 8174 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
355fe088 8175 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8176
ebfd146a
IR
8177 switch (STMT_VINFO_TYPE (stmt_info))
8178 {
8179 case type_demotion_vec_info_type:
ebfd146a 8180 case type_promotion_vec_info_type:
ebfd146a
IR
8181 case type_conversion_vec_info_type:
8182 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8183 gcc_assert (done);
8184 break;
8185
8186 case induc_vec_info_type:
8187 gcc_assert (!slp_node);
8188 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8189 gcc_assert (done);
8190 break;
8191
9dc3f7de
IR
8192 case shift_vec_info_type:
8193 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8194 gcc_assert (done);
8195 break;
8196
ebfd146a
IR
8197 case op_vec_info_type:
8198 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8199 gcc_assert (done);
8200 break;
8201
8202 case assignment_vec_info_type:
8203 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8204 gcc_assert (done);
8205 break;
8206
8207 case load_vec_info_type:
b8698a0f 8208 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
8209 slp_node_instance);
8210 gcc_assert (done);
8211 break;
8212
8213 case store_vec_info_type:
8214 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8215 gcc_assert (done);
0d0293ac 8216 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
8217 {
8218 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 8219 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
8220 one are skipped, and there vec_stmt_info shouldn't be freed
8221 meanwhile. */
0d0293ac 8222 *grouped_store = true;
ebfd146a
IR
8223 if (STMT_VINFO_VEC_STMT (stmt_info))
8224 is_store = true;
8225 }
8226 else
8227 is_store = true;
8228 break;
8229
8230 case condition_vec_info_type:
f7e531cf 8231 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
8232 gcc_assert (done);
8233 break;
8234
42fd8198
IE
8235 case comparison_vec_info_type:
8236 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8237 gcc_assert (done);
8238 break;
8239
ebfd146a 8240 case call_vec_info_type:
190c2236 8241 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 8242 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
8243 if (is_gimple_call (stmt)
8244 && gimple_call_internal_p (stmt)
8245 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8246 is_store = true;
ebfd146a
IR
8247 break;
8248
0136f8f0
AH
8249 case call_simd_clone_vec_info_type:
8250 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8251 stmt = gsi_stmt (*gsi);
8252 break;
8253
ebfd146a 8254 case reduc_vec_info_type:
b5aeb3bb 8255 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8256 gcc_assert (done);
8257 break;
8258
8259 default:
8260 if (!STMT_VINFO_LIVE_P (stmt_info))
8261 {
73fbfcad 8262 if (dump_enabled_p ())
78c60e3d 8263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8264 "stmt not supported.\n");
ebfd146a
IR
8265 gcc_unreachable ();
8266 }
8267 }
8268
225ce44b
RB
8269 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8270 This would break hybrid SLP vectorization. */
8271 if (slp_node)
d90f8440
RB
8272 gcc_assert (!vec_stmt
8273 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 8274
ebfd146a
IR
8275 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8276 is being vectorized, but outside the immediately enclosing loop. */
8277 if (vec_stmt
a70d6342
IR
8278 && STMT_VINFO_LOOP_VINFO (stmt_info)
8279 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8280 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
8281 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8282 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 8283 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 8284 vect_used_in_outer_by_reduction))
ebfd146a 8285 {
a70d6342
IR
8286 struct loop *innerloop = LOOP_VINFO_LOOP (
8287 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
8288 imm_use_iterator imm_iter;
8289 use_operand_p use_p;
8290 tree scalar_dest;
355fe088 8291 gimple *exit_phi;
ebfd146a 8292
73fbfcad 8293 if (dump_enabled_p ())
78c60e3d 8294 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 8295 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
8296
8297 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8298 (to be used when vectorizing outer-loop stmts that use the DEF of
8299 STMT). */
8300 if (gimple_code (stmt) == GIMPLE_PHI)
8301 scalar_dest = PHI_RESULT (stmt);
8302 else
8303 scalar_dest = gimple_assign_lhs (stmt);
8304
8305 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8306 {
8307 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8308 {
8309 exit_phi = USE_STMT (use_p);
8310 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8311 }
8312 }
8313 }
8314
8315 /* Handle stmts whose DEF is used outside the loop-nest that is
8316 being vectorized. */
b28ead45
AH
8317 if (slp_node)
8318 {
8319 gimple *slp_stmt;
8320 int i;
8321 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8322 {
8323 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8324 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8325 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8326 {
8327 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8328 &vec_stmt);
8329 gcc_assert (done);
8330 }
8331 }
8332 }
8333 else if (STMT_VINFO_LIVE_P (stmt_info)
ebfd146a
IR
8334 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8335 {
b28ead45 8336 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
ebfd146a
IR
8337 gcc_assert (done);
8338 }
8339
8340 if (vec_stmt)
83197f37 8341 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 8342
b8698a0f 8343 return is_store;
ebfd146a
IR
8344}
8345
8346
b8698a0f 8347/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
8348 stmt_vec_info. */
8349
8350void
355fe088 8351vect_remove_stores (gimple *first_stmt)
ebfd146a 8352{
355fe088
TS
8353 gimple *next = first_stmt;
8354 gimple *tmp;
ebfd146a
IR
8355 gimple_stmt_iterator next_si;
8356
8357 while (next)
8358 {
78048b1c
JJ
8359 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8360
8361 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8362 if (is_pattern_stmt_p (stmt_info))
8363 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
8364 /* Free the attached stmt_vec_info and remove the stmt. */
8365 next_si = gsi_for_stmt (next);
3d3f2249 8366 unlink_stmt_vdef (next);
ebfd146a 8367 gsi_remove (&next_si, true);
3d3f2249 8368 release_defs (next);
ebfd146a
IR
8369 free_stmt_vec_info (next);
8370 next = tmp;
8371 }
8372}
8373
8374
8375/* Function new_stmt_vec_info.
8376
8377 Create and initialize a new stmt_vec_info struct for STMT. */
8378
8379stmt_vec_info
310213d4 8380new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
8381{
8382 stmt_vec_info res;
8383 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8384
8385 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8386 STMT_VINFO_STMT (res) = stmt;
310213d4 8387 res->vinfo = vinfo;
8644a673 8388 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
8389 STMT_VINFO_LIVE_P (res) = false;
8390 STMT_VINFO_VECTYPE (res) = NULL;
8391 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 8392 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
8393 STMT_VINFO_IN_PATTERN_P (res) = false;
8394 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 8395 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 8396 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 8397 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
ebfd146a
IR
8398
8399 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8400 STMT_VINFO_DR_OFFSET (res) = NULL;
8401 STMT_VINFO_DR_INIT (res) = NULL;
8402 STMT_VINFO_DR_STEP (res) = NULL;
8403 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8404
8405 if (gimple_code (stmt) == GIMPLE_PHI
8406 && is_loop_header_bb_p (gimple_bb (stmt)))
8407 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8408 else
8644a673
IR
8409 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8410
9771b263 8411 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 8412 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
8413 STMT_VINFO_NUM_SLP_USES (res) = 0;
8414
e14c1050
IR
8415 GROUP_FIRST_ELEMENT (res) = NULL;
8416 GROUP_NEXT_ELEMENT (res) = NULL;
8417 GROUP_SIZE (res) = 0;
8418 GROUP_STORE_COUNT (res) = 0;
8419 GROUP_GAP (res) = 0;
8420 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
8421
8422 return res;
8423}
8424
8425
8426/* Create a hash table for stmt_vec_info. */
8427
8428void
8429init_stmt_vec_info_vec (void)
8430{
9771b263
DN
8431 gcc_assert (!stmt_vec_info_vec.exists ());
8432 stmt_vec_info_vec.create (50);
ebfd146a
IR
8433}
8434
8435
8436/* Free hash table for stmt_vec_info. */
8437
8438void
8439free_stmt_vec_info_vec (void)
8440{
93675444 8441 unsigned int i;
3161455c 8442 stmt_vec_info info;
93675444
JJ
8443 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8444 if (info != NULL)
3161455c 8445 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
8446 gcc_assert (stmt_vec_info_vec.exists ());
8447 stmt_vec_info_vec.release ();
ebfd146a
IR
8448}
8449
8450
8451/* Free stmt vectorization related info. */
8452
8453void
355fe088 8454free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
8455{
8456 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8457
8458 if (!stmt_info)
8459 return;
8460
78048b1c
JJ
8461 /* Check if this statement has a related "pattern stmt"
8462 (introduced by the vectorizer during the pattern recognition
8463 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8464 too. */
8465 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8466 {
8467 stmt_vec_info patt_info
8468 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8469 if (patt_info)
8470 {
363477c0 8471 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 8472 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
8473 gimple_set_bb (patt_stmt, NULL);
8474 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 8475 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 8476 release_ssa_name (lhs);
363477c0
JJ
8477 if (seq)
8478 {
8479 gimple_stmt_iterator si;
8480 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 8481 {
355fe088 8482 gimple *seq_stmt = gsi_stmt (si);
f0281fde 8483 gimple_set_bb (seq_stmt, NULL);
7532abf2 8484 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 8485 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
8486 release_ssa_name (lhs);
8487 free_stmt_vec_info (seq_stmt);
8488 }
363477c0 8489 }
f0281fde 8490 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
8491 }
8492 }
8493
9771b263 8494 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 8495 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
8496 set_vinfo_for_stmt (stmt, NULL);
8497 free (stmt_info);
8498}
8499
8500
bb67d9c7 8501/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 8502
bb67d9c7 8503 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
8504 by the target. */
8505
bb67d9c7
RG
8506static tree
8507get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 8508{
ef4bddc2
RS
8509 machine_mode inner_mode = TYPE_MODE (scalar_type);
8510 machine_mode simd_mode;
2f816591 8511 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
8512 int nunits;
8513 tree vectype;
8514
cc4b5170 8515 if (nbytes == 0)
ebfd146a
IR
8516 return NULL_TREE;
8517
48f2e373
RB
8518 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8519 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8520 return NULL_TREE;
8521
7b7b1813
RG
8522 /* For vector types of elements whose mode precision doesn't
8523 match their types precision we use a element type of mode
8524 precision. The vectorization routines will have to make sure
48f2e373
RB
8525 they support the proper result truncation/extension.
8526 We also make sure to build vector types with INTEGER_TYPE
8527 component type only. */
6d7971b8 8528 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
8529 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8530 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
8531 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8532 TYPE_UNSIGNED (scalar_type));
6d7971b8 8533
ccbf5bb4
RG
8534 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8535 When the component mode passes the above test simply use a type
8536 corresponding to that mode. The theory is that any use that
8537 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 8538 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 8539 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
8540 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8541
8542 /* We can't build a vector type of elements with alignment bigger than
8543 their size. */
dfc2e2ac 8544 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
8545 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8546 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 8547
dfc2e2ac
RB
8548 /* If we felt back to using the mode fail if there was
8549 no scalar type for it. */
8550 if (scalar_type == NULL_TREE)
8551 return NULL_TREE;
8552
bb67d9c7
RG
8553 /* If no size was supplied use the mode the target prefers. Otherwise
8554 lookup a vector mode of the specified size. */
8555 if (size == 0)
8556 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8557 else
8558 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
8559 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8560 if (nunits <= 1)
8561 return NULL_TREE;
ebfd146a
IR
8562
8563 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
8564
8565 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8566 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 8567 return NULL_TREE;
ebfd146a
IR
8568
8569 return vectype;
8570}
8571
bb67d9c7
RG
8572unsigned int current_vector_size;
8573
8574/* Function get_vectype_for_scalar_type.
8575
8576 Returns the vector type corresponding to SCALAR_TYPE as supported
8577 by the target. */
8578
8579tree
8580get_vectype_for_scalar_type (tree scalar_type)
8581{
8582 tree vectype;
8583 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8584 current_vector_size);
8585 if (vectype
8586 && current_vector_size == 0)
8587 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8588 return vectype;
8589}
8590
42fd8198
IE
8591/* Function get_mask_type_for_scalar_type.
8592
8593 Returns the mask type corresponding to a result of comparison
8594 of vectors of specified SCALAR_TYPE as supported by target. */
8595
8596tree
8597get_mask_type_for_scalar_type (tree scalar_type)
8598{
8599 tree vectype = get_vectype_for_scalar_type (scalar_type);
8600
8601 if (!vectype)
8602 return NULL;
8603
8604 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8605 current_vector_size);
8606}
8607
b690cc0f
RG
8608/* Function get_same_sized_vectype
8609
8610 Returns a vector type corresponding to SCALAR_TYPE of size
8611 VECTOR_TYPE if supported by the target. */
8612
8613tree
bb67d9c7 8614get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 8615{
9f47c7e5
IE
8616 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8617 return build_same_sized_truth_vector_type (vector_type);
8618
bb67d9c7
RG
8619 return get_vectype_for_scalar_type_and_size
8620 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
8621}
8622
ebfd146a
IR
8623/* Function vect_is_simple_use.
8624
8625 Input:
81c40241
RB
8626 VINFO - the vect info of the loop or basic block that is being vectorized.
8627 OPERAND - operand in the loop or bb.
8628 Output:
8629 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8630 DT - the type of definition
ebfd146a
IR
8631
8632 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 8633 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 8634 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 8635 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
8636 is the case in reduction/induction computations).
8637 For basic blocks, supportable operands are constants and bb invariants.
8638 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
8639
8640bool
81c40241
RB
8641vect_is_simple_use (tree operand, vec_info *vinfo,
8642 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 8643{
ebfd146a 8644 *def_stmt = NULL;
3fc356dc 8645 *dt = vect_unknown_def_type;
b8698a0f 8646
73fbfcad 8647 if (dump_enabled_p ())
ebfd146a 8648 {
78c60e3d
SS
8649 dump_printf_loc (MSG_NOTE, vect_location,
8650 "vect_is_simple_use: operand ");
8651 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 8652 dump_printf (MSG_NOTE, "\n");
ebfd146a 8653 }
b8698a0f 8654
b758f602 8655 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
8656 {
8657 *dt = vect_constant_def;
8658 return true;
8659 }
b8698a0f 8660
ebfd146a
IR
8661 if (is_gimple_min_invariant (operand))
8662 {
8644a673 8663 *dt = vect_external_def;
ebfd146a
IR
8664 return true;
8665 }
8666
ebfd146a
IR
8667 if (TREE_CODE (operand) != SSA_NAME)
8668 {
73fbfcad 8669 if (dump_enabled_p ())
af29617a
AH
8670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8671 "not ssa-name.\n");
ebfd146a
IR
8672 return false;
8673 }
b8698a0f 8674
3fc356dc 8675 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 8676 {
3fc356dc
RB
8677 *dt = vect_external_def;
8678 return true;
ebfd146a
IR
8679 }
8680
3fc356dc 8681 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 8682 if (dump_enabled_p ())
ebfd146a 8683 {
78c60e3d
SS
8684 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8685 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
8686 }
8687
61d371eb 8688 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 8689 *dt = vect_external_def;
ebfd146a
IR
8690 else
8691 {
3fc356dc 8692 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 8693 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
8694 }
8695
2e8ab70c
RB
8696 if (dump_enabled_p ())
8697 {
8698 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8699 switch (*dt)
8700 {
8701 case vect_uninitialized_def:
8702 dump_printf (MSG_NOTE, "uninitialized\n");
8703 break;
8704 case vect_constant_def:
8705 dump_printf (MSG_NOTE, "constant\n");
8706 break;
8707 case vect_external_def:
8708 dump_printf (MSG_NOTE, "external\n");
8709 break;
8710 case vect_internal_def:
8711 dump_printf (MSG_NOTE, "internal\n");
8712 break;
8713 case vect_induction_def:
8714 dump_printf (MSG_NOTE, "induction\n");
8715 break;
8716 case vect_reduction_def:
8717 dump_printf (MSG_NOTE, "reduction\n");
8718 break;
8719 case vect_double_reduction_def:
8720 dump_printf (MSG_NOTE, "double reduction\n");
8721 break;
8722 case vect_nested_cycle:
8723 dump_printf (MSG_NOTE, "nested cycle\n");
8724 break;
8725 case vect_unknown_def_type:
8726 dump_printf (MSG_NOTE, "unknown\n");
8727 break;
8728 }
8729 }
8730
81c40241 8731 if (*dt == vect_unknown_def_type)
ebfd146a 8732 {
73fbfcad 8733 if (dump_enabled_p ())
78c60e3d 8734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8735 "Unsupported pattern.\n");
ebfd146a
IR
8736 return false;
8737 }
8738
ebfd146a
IR
8739 switch (gimple_code (*def_stmt))
8740 {
8741 case GIMPLE_PHI:
ebfd146a 8742 case GIMPLE_ASSIGN:
ebfd146a 8743 case GIMPLE_CALL:
81c40241 8744 break;
ebfd146a 8745 default:
73fbfcad 8746 if (dump_enabled_p ())
78c60e3d 8747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8748 "unsupported defining stmt:\n");
ebfd146a
IR
8749 return false;
8750 }
8751
8752 return true;
8753}
8754
81c40241 8755/* Function vect_is_simple_use.
b690cc0f 8756
81c40241 8757 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
8758 type of OPERAND and stores it to *VECTYPE. If the definition of
8759 OPERAND is vect_uninitialized_def, vect_constant_def or
8760 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8761 is responsible to compute the best suited vector type for the
8762 scalar operand. */
8763
8764bool
81c40241
RB
8765vect_is_simple_use (tree operand, vec_info *vinfo,
8766 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 8767{
81c40241 8768 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
8769 return false;
8770
8771 /* Now get a vector type if the def is internal, otherwise supply
8772 NULL_TREE and leave it up to the caller to figure out a proper
8773 type for the use stmt. */
8774 if (*dt == vect_internal_def
8775 || *dt == vect_induction_def
8776 || *dt == vect_reduction_def
8777 || *dt == vect_double_reduction_def
8778 || *dt == vect_nested_cycle)
8779 {
8780 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
8781
8782 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8783 && !STMT_VINFO_RELEVANT (stmt_info)
8784 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 8785 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 8786
b690cc0f
RG
8787 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8788 gcc_assert (*vectype != NULL_TREE);
8789 }
8790 else if (*dt == vect_uninitialized_def
8791 || *dt == vect_constant_def
8792 || *dt == vect_external_def)
8793 *vectype = NULL_TREE;
8794 else
8795 gcc_unreachable ();
8796
8797 return true;
8798}
8799
ebfd146a
IR
8800
8801/* Function supportable_widening_operation
8802
b8698a0f
L
8803 Check whether an operation represented by the code CODE is a
8804 widening operation that is supported by the target platform in
b690cc0f
RG
8805 vector form (i.e., when operating on arguments of type VECTYPE_IN
8806 producing a result of type VECTYPE_OUT).
b8698a0f 8807
ebfd146a
IR
8808 Widening operations we currently support are NOP (CONVERT), FLOAT
8809 and WIDEN_MULT. This function checks if these operations are supported
8810 by the target platform either directly (via vector tree-codes), or via
8811 target builtins.
8812
8813 Output:
b8698a0f
L
8814 - CODE1 and CODE2 are codes of vector operations to be used when
8815 vectorizing the operation, if available.
ebfd146a
IR
8816 - MULTI_STEP_CVT determines the number of required intermediate steps in
8817 case of multi-step conversion (like char->short->int - in that case
8818 MULTI_STEP_CVT will be 1).
b8698a0f
L
8819 - INTERM_TYPES contains the intermediate type required to perform the
8820 widening operation (short in the above example). */
ebfd146a
IR
8821
8822bool
355fe088 8823supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 8824 tree vectype_out, tree vectype_in,
ebfd146a
IR
8825 enum tree_code *code1, enum tree_code *code2,
8826 int *multi_step_cvt,
9771b263 8827 vec<tree> *interm_types)
ebfd146a
IR
8828{
8829 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8830 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 8831 struct loop *vect_loop = NULL;
ef4bddc2 8832 machine_mode vec_mode;
81f40b79 8833 enum insn_code icode1, icode2;
ebfd146a 8834 optab optab1, optab2;
b690cc0f
RG
8835 tree vectype = vectype_in;
8836 tree wide_vectype = vectype_out;
ebfd146a 8837 enum tree_code c1, c2;
4a00c761
JJ
8838 int i;
8839 tree prev_type, intermediate_type;
ef4bddc2 8840 machine_mode intermediate_mode, prev_mode;
4a00c761 8841 optab optab3, optab4;
ebfd146a 8842
4a00c761 8843 *multi_step_cvt = 0;
4ef69dfc
IR
8844 if (loop_info)
8845 vect_loop = LOOP_VINFO_LOOP (loop_info);
8846
ebfd146a
IR
8847 switch (code)
8848 {
8849 case WIDEN_MULT_EXPR:
6ae6116f
RH
8850 /* The result of a vectorized widening operation usually requires
8851 two vectors (because the widened results do not fit into one vector).
8852 The generated vector results would normally be expected to be
8853 generated in the same order as in the original scalar computation,
8854 i.e. if 8 results are generated in each vector iteration, they are
8855 to be organized as follows:
8856 vect1: [res1,res2,res3,res4],
8857 vect2: [res5,res6,res7,res8].
8858
8859 However, in the special case that the result of the widening
8860 operation is used in a reduction computation only, the order doesn't
8861 matter (because when vectorizing a reduction we change the order of
8862 the computation). Some targets can take advantage of this and
8863 generate more efficient code. For example, targets like Altivec,
8864 that support widen_mult using a sequence of {mult_even,mult_odd}
8865 generate the following vectors:
8866 vect1: [res1,res3,res5,res7],
8867 vect2: [res2,res4,res6,res8].
8868
8869 When vectorizing outer-loops, we execute the inner-loop sequentially
8870 (each vectorized inner-loop iteration contributes to VF outer-loop
8871 iterations in parallel). We therefore don't allow to change the
8872 order of the computation in the inner-loop during outer-loop
8873 vectorization. */
8874 /* TODO: Another case in which order doesn't *really* matter is when we
8875 widen and then contract again, e.g. (short)((int)x * y >> 8).
8876 Normally, pack_trunc performs an even/odd permute, whereas the
8877 repack from an even/odd expansion would be an interleave, which
8878 would be significantly simpler for e.g. AVX2. */
8879 /* In any case, in order to avoid duplicating the code below, recurse
8880 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8881 are properly set up for the caller. If we fail, we'll continue with
8882 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8883 if (vect_loop
8884 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8885 && !nested_in_vect_loop_p (vect_loop, stmt)
8886 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8887 stmt, vectype_out, vectype_in,
a86ec597
RH
8888 code1, code2, multi_step_cvt,
8889 interm_types))
ebc047a2
CH
8890 {
8891 /* Elements in a vector with vect_used_by_reduction property cannot
8892 be reordered if the use chain with this property does not have the
8893 same operation. One such an example is s += a * b, where elements
8894 in a and b cannot be reordered. Here we check if the vector defined
8895 by STMT is only directly used in the reduction statement. */
8896 tree lhs = gimple_assign_lhs (stmt);
8897 use_operand_p dummy;
355fe088 8898 gimple *use_stmt;
ebc047a2
CH
8899 stmt_vec_info use_stmt_info = NULL;
8900 if (single_imm_use (lhs, &dummy, &use_stmt)
8901 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8902 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8903 return true;
8904 }
4a00c761
JJ
8905 c1 = VEC_WIDEN_MULT_LO_EXPR;
8906 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
8907 break;
8908
81c40241
RB
8909 case DOT_PROD_EXPR:
8910 c1 = DOT_PROD_EXPR;
8911 c2 = DOT_PROD_EXPR;
8912 break;
8913
8914 case SAD_EXPR:
8915 c1 = SAD_EXPR;
8916 c2 = SAD_EXPR;
8917 break;
8918
6ae6116f
RH
8919 case VEC_WIDEN_MULT_EVEN_EXPR:
8920 /* Support the recursion induced just above. */
8921 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8922 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8923 break;
8924
36ba4aae 8925 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
8926 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8927 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
8928 break;
8929
ebfd146a 8930 CASE_CONVERT:
4a00c761
JJ
8931 c1 = VEC_UNPACK_LO_EXPR;
8932 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
8933 break;
8934
8935 case FLOAT_EXPR:
4a00c761
JJ
8936 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8937 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
8938 break;
8939
8940 case FIX_TRUNC_EXPR:
8941 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8942 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8943 computing the operation. */
8944 return false;
8945
8946 default:
8947 gcc_unreachable ();
8948 }
8949
6ae6116f 8950 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 8951 std::swap (c1, c2);
4a00c761 8952
ebfd146a
IR
8953 if (code == FIX_TRUNC_EXPR)
8954 {
8955 /* The signedness is determined from output operand. */
b690cc0f
RG
8956 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8957 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
8958 }
8959 else
8960 {
8961 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8962 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8963 }
8964
8965 if (!optab1 || !optab2)
8966 return false;
8967
8968 vec_mode = TYPE_MODE (vectype);
947131ba
RS
8969 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8970 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8971 return false;
8972
4a00c761
JJ
8973 *code1 = c1;
8974 *code2 = c2;
8975
8976 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8977 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
8978 /* For scalar masks we may have different boolean
8979 vector types having the same QImode. Thus we
8980 add additional check for elements number. */
8981 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
8982 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
8983 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761 8984
b8698a0f 8985 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 8986 types. */
ebfd146a 8987
4a00c761
JJ
8988 prev_type = vectype;
8989 prev_mode = vec_mode;
b8698a0f 8990
4a00c761
JJ
8991 if (!CONVERT_EXPR_CODE_P (code))
8992 return false;
b8698a0f 8993
4a00c761
JJ
8994 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8995 intermediate steps in promotion sequence. We try
8996 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8997 not. */
9771b263 8998 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8999 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9000 {
9001 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9002 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9003 {
9004 intermediate_type
9005 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9006 current_vector_size);
9007 if (intermediate_mode != TYPE_MODE (intermediate_type))
9008 return false;
9009 }
9010 else
9011 intermediate_type
9012 = lang_hooks.types.type_for_mode (intermediate_mode,
9013 TYPE_UNSIGNED (prev_type));
9014
4a00c761
JJ
9015 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9016 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9017
9018 if (!optab3 || !optab4
9019 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9020 || insn_data[icode1].operand[0].mode != intermediate_mode
9021 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9022 || insn_data[icode2].operand[0].mode != intermediate_mode
9023 || ((icode1 = optab_handler (optab3, intermediate_mode))
9024 == CODE_FOR_nothing)
9025 || ((icode2 = optab_handler (optab4, intermediate_mode))
9026 == CODE_FOR_nothing))
9027 break;
ebfd146a 9028
9771b263 9029 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9030 (*multi_step_cvt)++;
9031
9032 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9033 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5e8d6dff
IE
9034 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9035 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9036 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
4a00c761
JJ
9037
9038 prev_type = intermediate_type;
9039 prev_mode = intermediate_mode;
ebfd146a
IR
9040 }
9041
9771b263 9042 interm_types->release ();
4a00c761 9043 return false;
ebfd146a
IR
9044}
9045
9046
9047/* Function supportable_narrowing_operation
9048
b8698a0f
L
9049 Check whether an operation represented by the code CODE is a
9050 narrowing operation that is supported by the target platform in
b690cc0f
RG
9051 vector form (i.e., when operating on arguments of type VECTYPE_IN
9052 and producing a result of type VECTYPE_OUT).
b8698a0f 9053
ebfd146a 9054 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9055 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9056 the target platform directly via vector tree-codes.
9057
9058 Output:
b8698a0f
L
9059 - CODE1 is the code of a vector operation to be used when
9060 vectorizing the operation, if available.
ebfd146a
IR
9061 - MULTI_STEP_CVT determines the number of required intermediate steps in
9062 case of multi-step conversion (like int->short->char - in that case
9063 MULTI_STEP_CVT will be 1).
9064 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9065 narrowing operation (short in the above example). */
ebfd146a
IR
9066
9067bool
9068supportable_narrowing_operation (enum tree_code code,
b690cc0f 9069 tree vectype_out, tree vectype_in,
ebfd146a 9070 enum tree_code *code1, int *multi_step_cvt,
9771b263 9071 vec<tree> *interm_types)
ebfd146a 9072{
ef4bddc2 9073 machine_mode vec_mode;
ebfd146a
IR
9074 enum insn_code icode1;
9075 optab optab1, interm_optab;
b690cc0f
RG
9076 tree vectype = vectype_in;
9077 tree narrow_vectype = vectype_out;
ebfd146a 9078 enum tree_code c1;
3ae0661a 9079 tree intermediate_type, prev_type;
ef4bddc2 9080 machine_mode intermediate_mode, prev_mode;
ebfd146a 9081 int i;
4a00c761 9082 bool uns;
ebfd146a 9083
4a00c761 9084 *multi_step_cvt = 0;
ebfd146a
IR
9085 switch (code)
9086 {
9087 CASE_CONVERT:
9088 c1 = VEC_PACK_TRUNC_EXPR;
9089 break;
9090
9091 case FIX_TRUNC_EXPR:
9092 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9093 break;
9094
9095 case FLOAT_EXPR:
9096 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9097 tree code and optabs used for computing the operation. */
9098 return false;
9099
9100 default:
9101 gcc_unreachable ();
9102 }
9103
9104 if (code == FIX_TRUNC_EXPR)
9105 /* The signedness is determined from output operand. */
b690cc0f 9106 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9107 else
9108 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9109
9110 if (!optab1)
9111 return false;
9112
9113 vec_mode = TYPE_MODE (vectype);
947131ba 9114 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9115 return false;
9116
4a00c761
JJ
9117 *code1 = c1;
9118
9119 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9120 /* For scalar masks we may have different boolean
9121 vector types having the same QImode. Thus we
9122 add additional check for elements number. */
9123 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9124 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9125 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761 9126
ebfd146a
IR
9127 /* Check if it's a multi-step conversion that can be done using intermediate
9128 types. */
4a00c761 9129 prev_mode = vec_mode;
3ae0661a 9130 prev_type = vectype;
4a00c761
JJ
9131 if (code == FIX_TRUNC_EXPR)
9132 uns = TYPE_UNSIGNED (vectype_out);
9133 else
9134 uns = TYPE_UNSIGNED (vectype);
9135
9136 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9137 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9138 costly than signed. */
9139 if (code == FIX_TRUNC_EXPR && uns)
9140 {
9141 enum insn_code icode2;
9142
9143 intermediate_type
9144 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9145 interm_optab
9146 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9147 if (interm_optab != unknown_optab
4a00c761
JJ
9148 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9149 && insn_data[icode1].operand[0].mode
9150 == insn_data[icode2].operand[0].mode)
9151 {
9152 uns = false;
9153 optab1 = interm_optab;
9154 icode1 = icode2;
9155 }
9156 }
ebfd146a 9157
4a00c761
JJ
9158 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9159 intermediate steps in promotion sequence. We try
9160 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9161 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9162 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9163 {
9164 intermediate_mode = insn_data[icode1].operand[0].mode;
3ae0661a
IE
9165 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9166 {
9167 intermediate_type
9168 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9169 current_vector_size);
9170 if (intermediate_mode != TYPE_MODE (intermediate_type))
9171 return false;
9172 }
9173 else
9174 intermediate_type
9175 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
4a00c761
JJ
9176 interm_optab
9177 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9178 optab_default);
9179 if (!interm_optab
9180 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9181 || insn_data[icode1].operand[0].mode != intermediate_mode
9182 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9183 == CODE_FOR_nothing))
9184 break;
9185
9771b263 9186 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9187 (*multi_step_cvt)++;
9188
9189 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5e8d6dff
IE
9190 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9191 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9192 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
4a00c761
JJ
9193
9194 prev_mode = intermediate_mode;
3ae0661a 9195 prev_type = intermediate_type;
4a00c761 9196 optab1 = interm_optab;
ebfd146a
IR
9197 }
9198
9771b263 9199 interm_types->release ();
4a00c761 9200 return false;
ebfd146a 9201}