]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR tree-optimization/69466 (ICE: Invalid PHI argument after vectorization (on...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
818ab71a 2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "optabs-tree.h"
32#include "insn-config.h"
33#include "recog.h" /* FIXME: for insn_data */
34#include "cgraph.h"
957060b5 35#include "dumpfile.h"
c7131fb2 36#include "alias.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
2fb9a547 39#include "tree-eh.h"
45b0be94 40#include "gimplify.h"
5be5c238 41#include "gimple-iterator.h"
18f429e2 42#include "gimplify-me.h"
442b4905 43#include "tree-cfg.h"
e28030cf 44#include "tree-ssa-loop-manip.h"
ebfd146a 45#include "cfgloop.h"
0136f8f0
AH
46#include "tree-ssa-loop.h"
47#include "tree-scalar-evolution.h"
ebfd146a 48#include "tree-vectorizer.h"
9b2b7279 49#include "builtins.h"
70439f0d 50#include "internal-fn.h"
ebfd146a 51
7ee2468b
SB
52/* For lang_hooks.types.type_for_mode. */
53#include "langhooks.h"
ebfd146a 54
c3e7ee41
BS
55/* Return the vectorized type for the given statement. */
56
57tree
58stmt_vectype (struct _stmt_vec_info *stmt_info)
59{
60 return STMT_VINFO_VECTYPE (stmt_info);
61}
62
63/* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65bool
66stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
67{
355fe088 68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
c3e7ee41
BS
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
72
73 if (!loop_vinfo)
74 return false;
75
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
77
78 return (bb->loop_father == loop->inner);
79}
80
81/* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
84
85unsigned
92345349 86record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
c3e7ee41 87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92345349 88 int misalign, enum vect_cost_model_location where)
c3e7ee41 89{
92345349 90 if (body_cost_vec)
c3e7ee41 91 {
92345349 92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
ddf56386
RB
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
c3e7ee41 97 return (unsigned)
92345349 98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
c3e7ee41
BS
99 }
100 else
310213d4
RB
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
c3e7ee41
BS
103}
104
272c6793
RS
105/* Return a variable of type ELEM_TYPE[NELEMS]. */
106
107static tree
108create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109{
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
112}
113
114/* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
118
119static tree
355fe088 120read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
272c6793
RS
121 tree array, unsigned HOST_WIDE_INT n)
122{
123 tree vect_type, vect, vect_name, array_ref;
355fe088 124 gimple *new_stmt;
272c6793
RS
125
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
132
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
137
138 return vect_name;
139}
140
141/* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
144
145static void
355fe088 146write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
272c6793
RS
147 tree array, unsigned HOST_WIDE_INT n)
148{
149 tree array_ref;
355fe088 150 gimple *new_stmt;
272c6793
RS
151
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
158}
159
160/* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
163
164static tree
165create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166{
272c6793
RS
167 tree mem_ref, alias_ptr_type;
168
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
644ffefd 172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
272c6793
RS
173 return mem_ref;
174}
175
ebfd146a
IR
176/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177
178/* Function vect_mark_relevant.
179
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181
182static void
355fe088 183vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
83197f37
IR
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
ebfd146a
IR
186{
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
355fe088 190 gimple *pattern_stmt;
ebfd146a 191
73fbfcad 192 if (dump_enabled_p ())
66c16fd9
RB
193 {
194 dump_printf_loc (MSG_NOTE, vect_location,
195 "mark relevant %d, live %d: ", relevant, live_p);
196 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
197 }
ebfd146a 198
83197f37
IR
199 /* If this stmt is an original stmt in a pattern, we might need to mark its
200 related pattern stmt instead of the original stmt. However, such stmts
201 may have their own uses that are not in any pattern, in such cases the
202 stmt itself should be marked. */
ebfd146a
IR
203 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
204 {
83197f37
IR
205 bool found = false;
206 if (!used_in_pattern)
207 {
208 imm_use_iterator imm_iter;
209 use_operand_p use_p;
355fe088 210 gimple *use_stmt;
83197f37 211 tree lhs;
13c931c9
JJ
212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
213 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 214
83197f37
IR
215 if (is_gimple_assign (stmt))
216 lhs = gimple_assign_lhs (stmt);
217 else
218 lhs = gimple_call_lhs (stmt);
ebfd146a 219
83197f37
IR
220 /* This use is out of pattern use, if LHS has other uses that are
221 pattern uses, we should mark the stmt itself, and not the pattern
222 stmt. */
5ce9450f 223 if (lhs && TREE_CODE (lhs) == SSA_NAME)
ab0ef706
JJ
224 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
225 {
226 if (is_gimple_debug (USE_STMT (use_p)))
227 continue;
228 use_stmt = USE_STMT (use_p);
229
13c931c9
JJ
230 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
231 continue;
232
ab0ef706
JJ
233 if (vinfo_for_stmt (use_stmt)
234 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
235 {
236 found = true;
237 break;
238 }
239 }
83197f37
IR
240 }
241
242 if (!found)
243 {
244 /* This is the last stmt in a sequence that was detected as a
245 pattern that can potentially be vectorized. Don't mark the stmt
246 as relevant/live because it's not going to be vectorized.
247 Instead mark the pattern-stmt that replaces it. */
248
249 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
250
73fbfcad 251 if (dump_enabled_p ())
78c60e3d
SS
252 dump_printf_loc (MSG_NOTE, vect_location,
253 "last stmt in pattern. don't mark"
e645e942 254 " relevant/live.\n");
83197f37
IR
255 stmt_info = vinfo_for_stmt (pattern_stmt);
256 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
257 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
258 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
259 stmt = pattern_stmt;
260 }
ebfd146a
IR
261 }
262
263 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
264 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
265 STMT_VINFO_RELEVANT (stmt_info) = relevant;
266
267 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
268 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
269 {
73fbfcad 270 if (dump_enabled_p ())
78c60e3d 271 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 272 "already marked relevant/live.\n");
ebfd146a
IR
273 return;
274 }
275
9771b263 276 worklist->safe_push (stmt);
ebfd146a
IR
277}
278
279
280/* Function vect_stmt_relevant_p.
281
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
284
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
289
290 CHECKME: what other side effects would the vectorizer allow? */
291
292static bool
355fe088 293vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
ebfd146a
IR
294 enum vect_relevant *relevant, bool *live_p)
295{
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
301
8644a673 302 *relevant = vect_unused_in_scope;
ebfd146a
IR
303 *live_p = false;
304
305 /* cond stmt other than loop exit cond. */
b8698a0f
L
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
8644a673 309 *relevant = vect_used_in_scope;
ebfd146a
IR
310
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
ac6aeab4
RB
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
ebfd146a 315 {
73fbfcad 316 if (dump_enabled_p ())
78c60e3d 317 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 318 "vec_stmt_relevant_p: stmt has vdefs.\n");
8644a673 319 *relevant = vect_used_in_scope;
ebfd146a
IR
320 }
321
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324 {
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 {
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
329 {
73fbfcad 330 if (dump_enabled_p ())
78c60e3d 331 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 332 "vec_stmt_relevant_p: used out of loop.\n");
ebfd146a 333
3157b0c2
AO
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
336
ebfd146a
IR
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
341
342 *live_p = true;
343 }
344 }
345 }
346
347 return (*live_p || *relevant);
348}
349
350
b8698a0f 351/* Function exist_non_indexing_operands_for_use_p
ebfd146a 352
ff802fa1 353 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
354 used in STMT for anything other than indexing an array. */
355
356static bool
355fe088 357exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
ebfd146a
IR
358{
359 tree operand;
360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 361
ff802fa1 362 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info))
366 return true;
59a05b0c 367
ebfd146a
IR
368 /* STMT has a data_ref. FORNOW this means that its of one of
369 the following forms:
370 -1- ARRAY_REF = var
371 -2- var = ARRAY_REF
372 (This should have been verified in analyze_data_refs).
373
374 'var' in the second case corresponds to a def, not a use,
b8698a0f 375 so USE cannot correspond to any operands that are not used
ebfd146a
IR
376 for array indexing.
377
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
ebfd146a
IR
380
381 if (!gimple_assign_copy_p (stmt))
5ce9450f
JJ
382 {
383 if (is_gimple_call (stmt)
384 && gimple_call_internal_p (stmt))
385 switch (gimple_call_internal_fn (stmt))
386 {
387 case IFN_MASK_STORE:
388 operand = gimple_call_arg (stmt, 3);
389 if (operand == use)
390 return true;
391 /* FALLTHRU */
392 case IFN_MASK_LOAD:
393 operand = gimple_call_arg (stmt, 2);
394 if (operand == use)
395 return true;
396 break;
397 default:
398 break;
399 }
400 return false;
401 }
402
59a05b0c
EB
403 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
404 return false;
ebfd146a 405 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
406 if (TREE_CODE (operand) != SSA_NAME)
407 return false;
408
409 if (operand == use)
410 return true;
411
412 return false;
413}
414
415
b8698a0f 416/*
ebfd146a
IR
417 Function process_use.
418
419 Inputs:
420 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 421 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 422 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a 423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
aec7ae7d
JJ
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
425 be performed.
ebfd146a
IR
426
427 Outputs:
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
432 Exceptions:
433 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 434 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 435 of the respective DEF_STMT is left unchanged.
b8698a0f
L
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
440
441 Return true if everything is as expected. Return false otherwise. */
442
443static bool
355fe088
TS
444process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
445 enum vect_relevant relevant, vec<gimple *> *worklist,
aec7ae7d 446 bool force)
ebfd146a
IR
447{
448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
449 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
450 stmt_vec_info dstmt_vinfo;
451 basic_block bb, def_bb;
355fe088 452 gimple *def_stmt;
ebfd146a
IR
453 enum vect_def_type dt;
454
b8698a0f 455 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a 456 that are used for address computation are not considered relevant. */
aec7ae7d 457 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
ebfd146a
IR
458 return true;
459
81c40241 460 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
b8698a0f 461 {
73fbfcad 462 if (dump_enabled_p ())
78c60e3d 463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 464 "not vectorized: unsupported use in stmt.\n");
ebfd146a
IR
465 return false;
466 }
467
468 if (!def_stmt || gimple_nop_p (def_stmt))
469 return true;
470
471 def_bb = gimple_bb (def_stmt);
472 if (!flow_bb_inside_loop_p (loop, def_bb))
473 {
73fbfcad 474 if (dump_enabled_p ())
e645e942 475 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
ebfd146a
IR
476 return true;
477 }
478
b8698a0f
L
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo = vinfo_for_stmt (def_stmt);
485 bb = gimple_bb (stmt);
486 if (gimple_code (stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (def_stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
491 {
73fbfcad 492 if (dump_enabled_p ())
78c60e3d 493 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 494 "reduc-stmt defining reduc-phi in the same nest.\n");
ebfd146a
IR
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
496 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
500 return true;
501 }
502
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
505 d = def_stmt
506 inner-loop:
507 stmt # use (d)
508 outer-loop-tail-bb:
509 ... */
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
511 {
73fbfcad 512 if (dump_enabled_p ())
78c60e3d 513 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 514 "outer-loop def-stmt defining inner-loop stmt.\n");
7c5222ff 515
ebfd146a
IR
516 switch (relevant)
517 {
8644a673 518 case vect_unused_in_scope:
7c5222ff
IR
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 521 break;
7c5222ff 522
ebfd146a 523 case vect_used_in_outer_by_reduction:
7c5222ff 524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
525 relevant = vect_used_by_reduction;
526 break;
7c5222ff 527
ebfd146a 528 case vect_used_in_outer:
7c5222ff 529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 530 relevant = vect_used_in_scope;
ebfd146a 531 break;
7c5222ff 532
8644a673 533 case vect_used_in_scope:
ebfd146a
IR
534 break;
535
536 default:
537 gcc_unreachable ();
b8698a0f 538 }
ebfd146a
IR
539 }
540
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
543 ...
544 inner-loop:
545 d = def_stmt
06066f92 546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
547 stmt # use (d) */
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
549 {
73fbfcad 550 if (dump_enabled_p ())
78c60e3d 551 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 552 "inner-loop def-stmt defining outer-loop stmt.\n");
7c5222ff 553
ebfd146a
IR
554 switch (relevant)
555 {
8644a673 556 case vect_unused_in_scope:
b8698a0f 557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
560 break;
561
ebfd146a
IR
562 case vect_used_by_reduction:
563 relevant = vect_used_in_outer_by_reduction;
564 break;
565
8644a673 566 case vect_used_in_scope:
ebfd146a
IR
567 relevant = vect_used_in_outer;
568 break;
569
570 default:
571 gcc_unreachable ();
572 }
573 }
574
83197f37
IR
575 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
576 is_pattern_stmt_p (stmt_vinfo));
ebfd146a
IR
577 return true;
578}
579
580
581/* Function vect_mark_stmts_to_be_vectorized.
582
583 Not all stmts in the loop need to be vectorized. For example:
584
585 for i...
586 for j...
587 1. T0 = i + j
588 2. T1 = a[T0]
589
590 3. j = j + 1
591
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
594
595 This pass detects such stmts. */
596
597bool
598vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
599{
ebfd146a
IR
600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
602 unsigned int nbbs = loop->num_nodes;
603 gimple_stmt_iterator si;
355fe088 604 gimple *stmt;
ebfd146a
IR
605 unsigned int i;
606 stmt_vec_info stmt_vinfo;
607 basic_block bb;
355fe088 608 gimple *phi;
ebfd146a 609 bool live_p;
06066f92
IR
610 enum vect_relevant relevant, tmp_relevant;
611 enum vect_def_type def_type;
ebfd146a 612
73fbfcad 613 if (dump_enabled_p ())
78c60e3d 614 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 615 "=== vect_mark_stmts_to_be_vectorized ===\n");
ebfd146a 616
355fe088 617 auto_vec<gimple *, 64> worklist;
ebfd146a
IR
618
619 /* 1. Init worklist. */
620 for (i = 0; i < nbbs; i++)
621 {
622 bb = bbs[i];
623 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 624 {
ebfd146a 625 phi = gsi_stmt (si);
73fbfcad 626 if (dump_enabled_p ())
ebfd146a 627 {
78c60e3d
SS
628 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
630 }
631
632 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
83197f37 633 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
ebfd146a
IR
634 }
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 {
637 stmt = gsi_stmt (si);
73fbfcad 638 if (dump_enabled_p ())
ebfd146a 639 {
78c60e3d
SS
640 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 642 }
ebfd146a
IR
643
644 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
83197f37 645 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
ebfd146a
IR
646 }
647 }
648
649 /* 2. Process_worklist */
9771b263 650 while (worklist.length () > 0)
ebfd146a
IR
651 {
652 use_operand_p use_p;
653 ssa_op_iter iter;
654
9771b263 655 stmt = worklist.pop ();
73fbfcad 656 if (dump_enabled_p ())
ebfd146a 657 {
78c60e3d
SS
658 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
660 }
661
b8698a0f
L
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
664 liveness and relevance properties of STMT. */
665 stmt_vinfo = vinfo_for_stmt (stmt);
666 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
668
669 /* Generally, the liveness and relevance properties of STMT are
670 propagated as is to the DEF_STMTs of its USEs:
671 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
672 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
673
674 One exception is when STMT has been identified as defining a reduction
675 variable; in this case we set the liveness/relevance as follows:
676 live_p = false
677 relevant = vect_used_by_reduction
678 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 679 those that are used by a reduction computation, and those that are
ff802fa1 680 (also) used by a regular computation. This allows us later on to
b8698a0f 681 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 682 order of the results that they produce does not have to be kept. */
ebfd146a 683
06066f92
IR
684 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
685 tmp_relevant = relevant;
686 switch (def_type)
ebfd146a 687 {
06066f92
IR
688 case vect_reduction_def:
689 switch (tmp_relevant)
690 {
691 case vect_unused_in_scope:
692 relevant = vect_used_by_reduction;
693 break;
694
695 case vect_used_by_reduction:
696 if (gimple_code (stmt) == GIMPLE_PHI)
697 break;
698 /* fall through */
699
700 default:
73fbfcad 701 if (dump_enabled_p ())
78c60e3d 702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 703 "unsupported use of reduction.\n");
06066f92
IR
704 return false;
705 }
706
b8698a0f 707 live_p = false;
06066f92 708 break;
b8698a0f 709
06066f92
IR
710 case vect_nested_cycle:
711 if (tmp_relevant != vect_unused_in_scope
712 && tmp_relevant != vect_used_in_outer_by_reduction
713 && tmp_relevant != vect_used_in_outer)
714 {
73fbfcad 715 if (dump_enabled_p ())
78c60e3d 716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 717 "unsupported use of nested cycle.\n");
7c5222ff 718
06066f92
IR
719 return false;
720 }
7c5222ff 721
b8698a0f
L
722 live_p = false;
723 break;
724
06066f92
IR
725 case vect_double_reduction_def:
726 if (tmp_relevant != vect_unused_in_scope
727 && tmp_relevant != vect_used_by_reduction)
728 {
73fbfcad 729 if (dump_enabled_p ())
78c60e3d 730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 731 "unsupported use of double reduction.\n");
7c5222ff 732
7c5222ff 733 return false;
06066f92
IR
734 }
735
736 live_p = false;
b8698a0f 737 break;
7c5222ff 738
06066f92
IR
739 default:
740 break;
7c5222ff 741 }
b8698a0f 742
aec7ae7d 743 if (is_pattern_stmt_p (stmt_vinfo))
9d5e7640
IR
744 {
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (is_gimple_assign (stmt))
749 {
69d2aade
JJ
750 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
751 tree op = gimple_assign_rhs1 (stmt);
752
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
755 {
756 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
aec7ae7d 757 live_p, relevant, &worklist, false)
69d2aade 758 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
aec7ae7d 759 live_p, relevant, &worklist, false))
566d377a 760 return false;
69d2aade
JJ
761 i = 2;
762 }
763 for (; i < gimple_num_ops (stmt); i++)
9d5e7640 764 {
69d2aade 765 op = gimple_op (stmt, i);
afbe6325
RB
766 if (TREE_CODE (op) == SSA_NAME
767 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
768 &worklist, false))
07687835 769 return false;
9d5e7640
IR
770 }
771 }
772 else if (is_gimple_call (stmt))
773 {
774 for (i = 0; i < gimple_call_num_args (stmt); i++)
775 {
776 tree arg = gimple_call_arg (stmt, i);
777 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
aec7ae7d 778 &worklist, false))
07687835 779 return false;
9d5e7640
IR
780 }
781 }
782 }
783 else
784 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
785 {
786 tree op = USE_FROM_PTR (use_p);
787 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
aec7ae7d 788 &worklist, false))
07687835 789 return false;
9d5e7640 790 }
aec7ae7d 791
3bab6342 792 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
aec7ae7d
JJ
793 {
794 tree off;
3bab6342 795 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
aec7ae7d
JJ
796 gcc_assert (decl);
797 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
798 &worklist, true))
566d377a 799 return false;
aec7ae7d 800 }
ebfd146a
IR
801 } /* while worklist */
802
ebfd146a
IR
803 return true;
804}
805
806
b8698a0f 807/* Function vect_model_simple_cost.
ebfd146a 808
b8698a0f 809 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
810 single op. Right now, this does not account for multiple insns that could
811 be generated for the single vector op. We will handle that shortly. */
812
813void
b8698a0f 814vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
92345349
BS
815 enum vect_def_type *dt,
816 stmt_vector_for_cost *prologue_cost_vec,
817 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
818{
819 int i;
92345349 820 int inside_cost = 0, prologue_cost = 0;
ebfd146a
IR
821
822 /* The SLP costs were already calculated during SLP tree build. */
823 if (PURE_SLP_STMT (stmt_info))
824 return;
825
ebfd146a
IR
826 /* FORNOW: Assuming maximum 2 args per stmts. */
827 for (i = 0; i < 2; i++)
92345349
BS
828 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
829 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
830 stmt_info, 0, vect_prologue);
c3e7ee41
BS
831
832 /* Pass the inside-of-loop statements to the target-specific cost model. */
92345349
BS
833 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
834 stmt_info, 0, vect_body);
c3e7ee41 835
73fbfcad 836 if (dump_enabled_p ())
78c60e3d
SS
837 dump_printf_loc (MSG_NOTE, vect_location,
838 "vect_model_simple_cost: inside_cost = %d, "
e645e942 839 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
840}
841
842
8bd37302
BS
843/* Model cost for type demotion and promotion operations. PWR is normally
844 zero for single-step promotions and demotions. It will be one if
845 two-step promotion/demotion is required, and so on. Each additional
846 step doubles the number of instructions required. */
847
848static void
849vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
850 enum vect_def_type *dt, int pwr)
851{
852 int i, tmp;
92345349 853 int inside_cost = 0, prologue_cost = 0;
c3e7ee41
BS
854 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
855 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
856 void *target_cost_data;
8bd37302
BS
857
858 /* The SLP costs were already calculated during SLP tree build. */
859 if (PURE_SLP_STMT (stmt_info))
860 return;
861
c3e7ee41
BS
862 if (loop_vinfo)
863 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
864 else
865 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
866
8bd37302
BS
867 for (i = 0; i < pwr + 1; i++)
868 {
869 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
870 (i + 1) : i;
c3e7ee41 871 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
92345349
BS
872 vec_promote_demote, stmt_info, 0,
873 vect_body);
8bd37302
BS
874 }
875
876 /* FORNOW: Assuming maximum 2 args per stmts. */
877 for (i = 0; i < 2; i++)
92345349
BS
878 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
879 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
880 stmt_info, 0, vect_prologue);
8bd37302 881
73fbfcad 882 if (dump_enabled_p ())
78c60e3d
SS
883 dump_printf_loc (MSG_NOTE, vect_location,
884 "vect_model_promotion_demotion_cost: inside_cost = %d, "
e645e942 885 "prologue_cost = %d .\n", inside_cost, prologue_cost);
8bd37302
BS
886}
887
0d0293ac 888/* Function vect_cost_group_size
b8698a0f 889
0d0293ac 890 For grouped load or store, return the group_size only if it is the first
ebfd146a
IR
891 load or store of a group, else return 1. This ensures that group size is
892 only returned once per group. */
893
894static int
0d0293ac 895vect_cost_group_size (stmt_vec_info stmt_info)
ebfd146a 896{
355fe088 897 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ebfd146a
IR
898
899 if (first_stmt == STMT_VINFO_STMT (stmt_info))
e14c1050 900 return GROUP_SIZE (stmt_info);
ebfd146a
IR
901
902 return 1;
903}
904
905
906/* Function vect_model_store_cost
907
0d0293ac
MM
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
ebfd146a
IR
910
911void
b8698a0f 912vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
272c6793 913 bool store_lanes_p, enum vect_def_type dt,
92345349
BS
914 slp_tree slp_node,
915 stmt_vector_for_cost *prologue_cost_vec,
916 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
917{
918 int group_size;
92345349 919 unsigned int inside_cost = 0, prologue_cost = 0;
720f5239 920 struct data_reference *first_dr;
355fe088 921 gimple *first_stmt;
ebfd146a 922
8644a673 923 if (dt == vect_constant_def || dt == vect_external_def)
92345349
BS
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
ebfd146a 926
0d0293ac
MM
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
720f5239
IR
929 {
930 if (slp_node)
931 {
9771b263 932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
720f5239
IR
933 group_size = 1;
934 }
935 else
936 {
e14c1050 937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 938 group_size = vect_cost_group_size (stmt_info);
720f5239
IR
939 }
940
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
942 }
0d0293ac 943 /* Not a grouped access. */
ebfd146a 944 else
720f5239
IR
945 {
946 group_size = 1;
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
948 }
ebfd146a 949
272c6793 950 /* We assume that the cost of a single store-lanes instruction is
0d0293ac 951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
272c6793
RS
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
cee62fee
MM
954 if (!store_lanes_p && group_size > 1
955 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 956 {
e1377713
ES
957 /* Uses a high and low interleave or shuffle operations for each
958 needed permute. */
959 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
92345349
BS
960 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
961 stmt_info, 0, vect_body);
ebfd146a 962
73fbfcad 963 if (dump_enabled_p ())
78c60e3d 964 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 965 "vect_model_store_cost: strided group_size = %d .\n",
78c60e3d 966 group_size);
ebfd146a
IR
967 }
968
cee62fee 969 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 970 /* Costs of the stores. */
cee62fee
MM
971 if (STMT_VINFO_STRIDED_P (stmt_info)
972 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
f2e2a985
MM
973 {
974 /* N scalar stores plus extracting the elements. */
f2e2a985
MM
975 inside_cost += record_stmt_cost (body_cost_vec,
976 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
977 scalar_store, stmt_info, 0, vect_body);
f2e2a985
MM
978 }
979 else
980 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
ebfd146a 981
cee62fee
MM
982 if (STMT_VINFO_STRIDED_P (stmt_info))
983 inside_cost += record_stmt_cost (body_cost_vec,
984 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
985 vec_to_scalar, stmt_info, 0, vect_body);
986
73fbfcad 987 if (dump_enabled_p ())
78c60e3d
SS
988 dump_printf_loc (MSG_NOTE, vect_location,
989 "vect_model_store_cost: inside_cost = %d, "
e645e942 990 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
991}
992
993
720f5239
IR
994/* Calculate cost of DR's memory access. */
995void
996vect_get_store_cost (struct data_reference *dr, int ncopies,
c3e7ee41 997 unsigned int *inside_cost,
92345349 998 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
999{
1000 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1001 gimple *stmt = DR_STMT (dr);
c3e7ee41 1002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1003
1004 switch (alignment_support_scheme)
1005 {
1006 case dr_aligned:
1007 {
92345349
BS
1008 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1009 vector_store, stmt_info, 0,
1010 vect_body);
720f5239 1011
73fbfcad 1012 if (dump_enabled_p ())
78c60e3d 1013 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1014 "vect_model_store_cost: aligned.\n");
720f5239
IR
1015 break;
1016 }
1017
1018 case dr_unaligned_supported:
1019 {
720f5239 1020 /* Here, we assign an additional cost for the unaligned store. */
92345349 1021 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1022 unaligned_store, stmt_info,
92345349 1023 DR_MISALIGNMENT (dr), vect_body);
73fbfcad 1024 if (dump_enabled_p ())
78c60e3d
SS
1025 dump_printf_loc (MSG_NOTE, vect_location,
1026 "vect_model_store_cost: unaligned supported by "
e645e942 1027 "hardware.\n");
720f5239
IR
1028 break;
1029 }
1030
38eec4c6
UW
1031 case dr_unaligned_unsupported:
1032 {
1033 *inside_cost = VECT_MAX_COST;
1034
73fbfcad 1035 if (dump_enabled_p ())
78c60e3d 1036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1037 "vect_model_store_cost: unsupported access.\n");
38eec4c6
UW
1038 break;
1039 }
1040
720f5239
IR
1041 default:
1042 gcc_unreachable ();
1043 }
1044}
1045
1046
ebfd146a
IR
1047/* Function vect_model_load_cost
1048
0d0293ac
MM
1049 Models cost for loads. In the case of grouped accesses, the last access
1050 has the overhead of the grouped access attributed to it. Since unaligned
b8698a0f 1051 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
1052 access scheme chosen. */
1053
1054void
92345349
BS
1055vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1056 bool load_lanes_p, slp_tree slp_node,
1057 stmt_vector_for_cost *prologue_cost_vec,
1058 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
1059{
1060 int group_size;
355fe088 1061 gimple *first_stmt;
ebfd146a 1062 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
92345349 1063 unsigned int inside_cost = 0, prologue_cost = 0;
ebfd146a 1064
0d0293ac 1065 /* Grouped accesses? */
e14c1050 1066 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
0d0293ac 1067 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
ebfd146a 1068 {
0d0293ac 1069 group_size = vect_cost_group_size (stmt_info);
ebfd146a
IR
1070 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1071 }
0d0293ac 1072 /* Not a grouped access. */
ebfd146a
IR
1073 else
1074 {
1075 group_size = 1;
1076 first_dr = dr;
1077 }
1078
272c6793 1079 /* We assume that the cost of a single load-lanes instruction is
0d0293ac 1080 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
272c6793
RS
1081 access is instead being provided by a load-and-permute operation,
1082 include the cost of the permutes. */
7b5fc413 1083 if (!load_lanes_p && group_size > 1
f2e2a985 1084 && !STMT_VINFO_STRIDED_P (stmt_info))
ebfd146a 1085 {
2c23db6d
ES
1086 /* Uses an even and odd extract operations or shuffle operations
1087 for each needed permute. */
1088 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1089 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1090 stmt_info, 0, vect_body);
ebfd146a 1091
73fbfcad 1092 if (dump_enabled_p ())
e645e942
TJ
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: strided group_size = %d .\n",
78c60e3d 1095 group_size);
ebfd146a
IR
1096 }
1097
1098 /* The loads themselves. */
f2e2a985 1099 if (STMT_VINFO_STRIDED_P (stmt_info)
7b5fc413 1100 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
a82960aa 1101 {
a21892ad
BS
1102 /* N scalar loads plus gathering them into a vector. */
1103 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
92345349 1104 inside_cost += record_stmt_cost (body_cost_vec,
c3e7ee41 1105 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
92345349 1106 scalar_load, stmt_info, 0, vect_body);
a82960aa
RG
1107 }
1108 else
1109 vect_get_load_cost (first_dr, ncopies,
1110 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1111 || group_size > 1 || slp_node),
92345349
BS
1112 &inside_cost, &prologue_cost,
1113 prologue_cost_vec, body_cost_vec, true);
f2e2a985 1114 if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413
RB
1115 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1116 stmt_info, 0, vect_body);
720f5239 1117
73fbfcad 1118 if (dump_enabled_p ())
78c60e3d
SS
1119 dump_printf_loc (MSG_NOTE, vect_location,
1120 "vect_model_load_cost: inside_cost = %d, "
e645e942 1121 "prologue_cost = %d .\n", inside_cost, prologue_cost);
720f5239
IR
1122}
1123
1124
1125/* Calculate cost of DR's memory access. */
1126void
1127vect_get_load_cost (struct data_reference *dr, int ncopies,
c3e7ee41 1128 bool add_realign_cost, unsigned int *inside_cost,
92345349
BS
1129 unsigned int *prologue_cost,
1130 stmt_vector_for_cost *prologue_cost_vec,
1131 stmt_vector_for_cost *body_cost_vec,
1132 bool record_prologue_costs)
720f5239
IR
1133{
1134 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
355fe088 1135 gimple *stmt = DR_STMT (dr);
c3e7ee41 1136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
1137
1138 switch (alignment_support_scheme)
ebfd146a
IR
1139 {
1140 case dr_aligned:
1141 {
92345349
BS
1142 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1143 stmt_info, 0, vect_body);
ebfd146a 1144
73fbfcad 1145 if (dump_enabled_p ())
78c60e3d 1146 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1147 "vect_model_load_cost: aligned.\n");
ebfd146a
IR
1148
1149 break;
1150 }
1151 case dr_unaligned_supported:
1152 {
720f5239 1153 /* Here, we assign an additional cost for the unaligned load. */
92345349 1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
c3e7ee41 1155 unaligned_load, stmt_info,
92345349 1156 DR_MISALIGNMENT (dr), vect_body);
c3e7ee41 1157
73fbfcad 1158 if (dump_enabled_p ())
78c60e3d
SS
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "vect_model_load_cost: unaligned supported by "
e645e942 1161 "hardware.\n");
ebfd146a
IR
1162
1163 break;
1164 }
1165 case dr_explicit_realign:
1166 {
92345349
BS
1167 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1168 vector_load, stmt_info, 0, vect_body);
1169 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1170 vec_perm, stmt_info, 0, vect_body);
ebfd146a
IR
1171
1172 /* FIXME: If the misalignment remains fixed across the iterations of
1173 the containing loop, the following cost should be added to the
92345349 1174 prologue costs. */
ebfd146a 1175 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1176 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1177 stmt_info, 0, vect_body);
ebfd146a 1178
73fbfcad 1179 if (dump_enabled_p ())
e645e942
TJ
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: explicit realign\n");
8bd37302 1182
ebfd146a
IR
1183 break;
1184 }
1185 case dr_explicit_realign_optimized:
1186 {
73fbfcad 1187 if (dump_enabled_p ())
e645e942 1188 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1189 "vect_model_load_cost: unaligned software "
e645e942 1190 "pipelined.\n");
ebfd146a
IR
1191
1192 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 1193 load, and possibly a mask operation to "prime" the loop. However,
0d0293ac 1194 if this is an access in a group of loads, which provide grouped
ebfd146a 1195 access, then the above cost should only be considered for one
ff802fa1 1196 access in the group. Inside the loop, there is a load op
ebfd146a
IR
1197 and a realignment op. */
1198
92345349 1199 if (add_realign_cost && record_prologue_costs)
ebfd146a 1200 {
92345349
BS
1201 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1202 vector_stmt, stmt_info,
1203 0, vect_prologue);
ebfd146a 1204 if (targetm.vectorize.builtin_mask_for_load)
92345349
BS
1205 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1206 vector_stmt, stmt_info,
1207 0, vect_prologue);
ebfd146a
IR
1208 }
1209
92345349
BS
1210 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1211 stmt_info, 0, vect_body);
1212 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1213 stmt_info, 0, vect_body);
8bd37302 1214
73fbfcad 1215 if (dump_enabled_p ())
78c60e3d 1216 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
1217 "vect_model_load_cost: explicit realign optimized"
1218 "\n");
8bd37302 1219
ebfd146a
IR
1220 break;
1221 }
1222
38eec4c6
UW
1223 case dr_unaligned_unsupported:
1224 {
1225 *inside_cost = VECT_MAX_COST;
1226
73fbfcad 1227 if (dump_enabled_p ())
78c60e3d 1228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1229 "vect_model_load_cost: unsupported access.\n");
38eec4c6
UW
1230 break;
1231 }
1232
ebfd146a
IR
1233 default:
1234 gcc_unreachable ();
1235 }
ebfd146a
IR
1236}
1237
418b7df3
RG
1238/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1239 the loop preheader for the vectorized stmt STMT. */
ebfd146a 1240
418b7df3 1241static void
355fe088 1242vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
ebfd146a 1243{
ebfd146a 1244 if (gsi)
418b7df3 1245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1246 else
1247 {
418b7df3 1248 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a 1249 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 1250
a70d6342
IR
1251 if (loop_vinfo)
1252 {
1253 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
418b7df3
RG
1254 basic_block new_bb;
1255 edge pe;
a70d6342
IR
1256
1257 if (nested_in_vect_loop_p (loop, stmt))
1258 loop = loop->inner;
b8698a0f 1259
a70d6342 1260 pe = loop_preheader_edge (loop);
418b7df3 1261 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
a70d6342
IR
1262 gcc_assert (!new_bb);
1263 }
1264 else
1265 {
1266 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1267 basic_block bb;
1268 gimple_stmt_iterator gsi_bb_start;
1269
1270 gcc_assert (bb_vinfo);
1271 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 1272 gsi_bb_start = gsi_after_labels (bb);
418b7df3 1273 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
a70d6342 1274 }
ebfd146a
IR
1275 }
1276
73fbfcad 1277 if (dump_enabled_p ())
ebfd146a 1278 {
78c60e3d
SS
1279 dump_printf_loc (MSG_NOTE, vect_location,
1280 "created new init_stmt: ");
1281 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
ebfd146a 1282 }
418b7df3
RG
1283}
1284
1285/* Function vect_init_vector.
ebfd146a 1286
5467ee52
RG
1287 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1288 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1289 vector type a vector with all elements equal to VAL is created first.
1290 Place the initialization at BSI if it is not NULL. Otherwise, place the
1291 initialization at the loop preheader.
418b7df3
RG
1292 Return the DEF of INIT_STMT.
1293 It will be used in the vectorization of STMT. */
1294
1295tree
355fe088 1296vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
418b7df3 1297{
355fe088 1298 gimple *init_stmt;
418b7df3
RG
1299 tree new_temp;
1300
5467ee52
RG
1301 if (TREE_CODE (type) == VECTOR_TYPE
1302 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
418b7df3 1303 {
5467ee52 1304 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
418b7df3 1305 {
5a308cf1
IE
1306 /* Scalar boolean value should be transformed into
1307 all zeros or all ones value before building a vector. */
1308 if (VECTOR_BOOLEAN_TYPE_P (type))
1309 {
b3d51f23
IE
1310 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1311 tree false_val = build_zero_cst (TREE_TYPE (type));
5a308cf1
IE
1312
1313 if (CONSTANT_CLASS_P (val))
1314 val = integer_zerop (val) ? false_val : true_val;
1315 else
1316 {
1317 new_temp = make_ssa_name (TREE_TYPE (type));
1318 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1319 val, true_val, false_val);
1320 vect_init_vector_1 (stmt, init_stmt, gsi);
1321 val = new_temp;
1322 }
1323 }
1324 else if (CONSTANT_CLASS_P (val))
42fd8198 1325 val = fold_convert (TREE_TYPE (type), val);
418b7df3
RG
1326 else
1327 {
b731b390 1328 new_temp = make_ssa_name (TREE_TYPE (type));
0d0e4a03 1329 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
418b7df3 1330 vect_init_vector_1 (stmt, init_stmt, gsi);
5467ee52 1331 val = new_temp;
418b7df3
RG
1332 }
1333 }
5467ee52 1334 val = build_vector_from_val (type, val);
418b7df3
RG
1335 }
1336
0e22bb5a
RB
1337 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1338 init_stmt = gimple_build_assign (new_temp, val);
418b7df3 1339 vect_init_vector_1 (stmt, init_stmt, gsi);
0e22bb5a 1340 return new_temp;
ebfd146a
IR
1341}
1342
a70d6342 1343
ebfd146a
IR
1344/* Function vect_get_vec_def_for_operand.
1345
ff802fa1 1346 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
1347 used in the vectorized stmt for STMT.
1348
1349 In the case that OP is an SSA_NAME which is defined in the loop, then
1350 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1351
1352 In case OP is an invariant or constant, a new stmt that creates a vector def
42fd8198
IE
1353 needs to be introduced. VECTYPE may be used to specify a required type for
1354 vector invariant. */
ebfd146a
IR
1355
1356tree
42fd8198 1357vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
ebfd146a
IR
1358{
1359 tree vec_oprnd;
355fe088
TS
1360 gimple *vec_stmt;
1361 gimple *def_stmt;
ebfd146a
IR
1362 stmt_vec_info def_stmt_info = NULL;
1363 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
42fd8198 1364 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
ebfd146a 1365 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
ebfd146a
IR
1366 enum vect_def_type dt;
1367 bool is_simple_use;
1368 tree vector_type;
1369
73fbfcad 1370 if (dump_enabled_p ())
ebfd146a 1371 {
78c60e3d
SS
1372 dump_printf_loc (MSG_NOTE, vect_location,
1373 "vect_get_vec_def_for_operand: ");
1374 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
e645e942 1375 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1376 }
1377
81c40241 1378 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
ebfd146a 1379 gcc_assert (is_simple_use);
73fbfcad 1380 if (dump_enabled_p ())
ebfd146a 1381 {
78c60e3d 1382 int loc_printed = 0;
ebfd146a
IR
1383 if (def_stmt)
1384 {
78c60e3d
SS
1385 if (loc_printed)
1386 dump_printf (MSG_NOTE, " def_stmt = ");
1387 else
1388 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1389 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
ebfd146a
IR
1390 }
1391 }
1392
1393 switch (dt)
1394 {
81c40241 1395 /* operand is a constant or a loop invariant. */
ebfd146a 1396 case vect_constant_def:
81c40241 1397 case vect_external_def:
ebfd146a 1398 {
42fd8198
IE
1399 if (vectype)
1400 vector_type = vectype;
1401 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1402 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1403 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1404 else
1405 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1406
7569a6cc 1407 gcc_assert (vector_type);
418b7df3 1408 return vect_init_vector (stmt, op, vector_type, NULL);
ebfd146a
IR
1409 }
1410
81c40241 1411 /* operand is defined inside the loop. */
8644a673 1412 case vect_internal_def:
ebfd146a 1413 {
ebfd146a
IR
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info = vinfo_for_stmt (def_stmt);
83197f37 1416
ebfd146a 1417 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
83197f37
IR
1418 /* Get vectorized pattern statement. */
1419 if (!vec_stmt
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info))
1422 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info)));
ebfd146a
IR
1424 gcc_assert (vec_stmt);
1425 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1426 vec_oprnd = PHI_RESULT (vec_stmt);
1427 else if (is_gimple_call (vec_stmt))
1428 vec_oprnd = gimple_call_lhs (vec_stmt);
1429 else
1430 vec_oprnd = gimple_assign_lhs (vec_stmt);
1431 return vec_oprnd;
1432 }
1433
81c40241 1434 /* operand is defined by a loop header phi - reduction */
ebfd146a 1435 case vect_reduction_def:
06066f92 1436 case vect_double_reduction_def:
7c5222ff 1437 case vect_nested_cycle:
81c40241
RB
1438 /* Code should use get_initial_def_for_reduction. */
1439 gcc_unreachable ();
ebfd146a 1440
81c40241 1441 /* operand is defined by loop-header phi - induction. */
ebfd146a
IR
1442 case vect_induction_def:
1443 {
1444 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1445
1446 /* Get the def from the vectorized stmt. */
1447 def_stmt_info = vinfo_for_stmt (def_stmt);
1448 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
6dbbece6
RG
1449 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1450 vec_oprnd = PHI_RESULT (vec_stmt);
1451 else
1452 vec_oprnd = gimple_get_lhs (vec_stmt);
ebfd146a
IR
1453 return vec_oprnd;
1454 }
1455
1456 default:
1457 gcc_unreachable ();
1458 }
1459}
1460
1461
1462/* Function vect_get_vec_def_for_stmt_copy
1463
ff802fa1 1464 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1467 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1469 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1470 DT is the type of the vector def VEC_OPRND.
1471
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1475 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1476 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1480 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1483
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1485
ebfd146a
IR
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1489 VS1.3: vx.3 = memref3
ebfd146a
IR
1490
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1495
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
b8698a0f
L
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1500 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1501 returns the vector-def 'vx.0'.
1502
b8698a0f
L
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1507
b8698a0f
L
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1517
1518tree
1519vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1520{
355fe088 1521 gimple *vec_stmt_for_operand;
ebfd146a
IR
1522 stmt_vec_info def_stmt_info;
1523
1524 /* Do nothing; can reuse same def. */
8644a673 1525 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1526 return vec_oprnd;
1527
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
ebfd146a
IR
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1538}
1539
1540
1541/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1543
1544static void
b8698a0f 1545vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
9771b263
DN
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
ebfd146a 1548{
9771b263 1549 tree vec_oprnd = vec_oprnds0->pop ();
ebfd146a
IR
1550
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
9771b263 1552 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a 1553
9771b263 1554 if (vec_oprnds1 && vec_oprnds1->length ())
ebfd146a 1555 {
9771b263 1556 vec_oprnd = vec_oprnds1->pop ();
ebfd146a 1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
9771b263 1558 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1559 }
1560}
1561
1562
d092494c
IR
1563/* Get vectorized definitions for OP0 and OP1.
1564 REDUC_INDEX is the index of reduction operand in case of reduction,
1565 and -1 otherwise. */
ebfd146a 1566
d092494c 1567void
355fe088 1568vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
9771b263
DN
1569 vec<tree> *vec_oprnds0,
1570 vec<tree> *vec_oprnds1,
d092494c 1571 slp_tree slp_node, int reduc_index)
ebfd146a
IR
1572{
1573 if (slp_node)
d092494c
IR
1574 {
1575 int nops = (op1 == NULL_TREE) ? 1 : 2;
ef062b13
TS
1576 auto_vec<tree> ops (nops);
1577 auto_vec<vec<tree> > vec_defs (nops);
d092494c 1578
9771b263 1579 ops.quick_push (op0);
d092494c 1580 if (op1)
9771b263 1581 ops.quick_push (op1);
d092494c
IR
1582
1583 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1584
37b5ec8f 1585 *vec_oprnds0 = vec_defs[0];
d092494c 1586 if (op1)
37b5ec8f 1587 *vec_oprnds1 = vec_defs[1];
d092494c 1588 }
ebfd146a
IR
1589 else
1590 {
1591 tree vec_oprnd;
1592
9771b263 1593 vec_oprnds0->create (1);
81c40241 1594 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
9771b263 1595 vec_oprnds0->quick_push (vec_oprnd);
ebfd146a
IR
1596
1597 if (op1)
1598 {
9771b263 1599 vec_oprnds1->create (1);
81c40241 1600 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
9771b263 1601 vec_oprnds1->quick_push (vec_oprnd);
ebfd146a
IR
1602 }
1603 }
1604}
1605
1606
1607/* Function vect_finish_stmt_generation.
1608
1609 Insert a new stmt. */
1610
1611void
355fe088 1612vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
ebfd146a
IR
1613 gimple_stmt_iterator *gsi)
1614{
1615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
310213d4 1616 vec_info *vinfo = stmt_info->vinfo;
ebfd146a
IR
1617
1618 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1619
54e8e2c3
RG
1620 if (!gsi_end_p (*gsi)
1621 && gimple_has_mem_ops (vec_stmt))
1622 {
355fe088 1623 gimple *at_stmt = gsi_stmt (*gsi);
54e8e2c3
RG
1624 tree vuse = gimple_vuse (at_stmt);
1625 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1626 {
1627 tree vdef = gimple_vdef (at_stmt);
1628 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1629 /* If we have an SSA vuse and insert a store, update virtual
1630 SSA form to avoid triggering the renamer. Do so only
1631 if we can easily see all uses - which is what almost always
1632 happens with the way vectorized stmts are inserted. */
1633 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1634 && ((is_gimple_assign (vec_stmt)
1635 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1636 || (is_gimple_call (vec_stmt)
1637 && !(gimple_call_flags (vec_stmt)
1638 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1639 {
1640 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1641 gimple_set_vdef (vec_stmt, new_vdef);
1642 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1643 }
1644 }
1645 }
ebfd146a
IR
1646 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1647
310213d4 1648 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
ebfd146a 1649
73fbfcad 1650 if (dump_enabled_p ())
ebfd146a 1651 {
78c60e3d
SS
1652 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
ebfd146a
IR
1654 }
1655
ad885386 1656 gimple_set_location (vec_stmt, gimple_location (stmt));
8e91d222
JJ
1657
1658 /* While EH edges will generally prevent vectorization, stmt might
1659 e.g. be in a must-not-throw region. Ensure newly created stmts
1660 that could throw are part of the same region. */
1661 int lp_nr = lookup_stmt_eh_lp (stmt);
1662 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1663 add_stmt_to_eh_lp (vec_stmt, lp_nr);
ebfd146a
IR
1664}
1665
70439f0d
RS
1666/* We want to vectorize a call to combined function CFN with function
1667 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1668 as the types of all inputs. Check whether this is possible using
1669 an internal function, returning its code if so or IFN_LAST if not. */
ebfd146a 1670
70439f0d
RS
1671static internal_fn
1672vectorizable_internal_function (combined_fn cfn, tree fndecl,
1673 tree vectype_out, tree vectype_in)
ebfd146a 1674{
70439f0d
RS
1675 internal_fn ifn;
1676 if (internal_fn_p (cfn))
1677 ifn = as_internal_fn (cfn);
1678 else
1679 ifn = associated_internal_fn (fndecl);
1680 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1681 {
1682 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1683 if (info.vectorizable)
1684 {
1685 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1686 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
d95ab70a
RS
1687 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1688 OPTIMIZE_FOR_SPEED))
70439f0d
RS
1689 return ifn;
1690 }
1691 }
1692 return IFN_LAST;
ebfd146a
IR
1693}
1694
5ce9450f 1695
355fe088 1696static tree permute_vec_elements (tree, tree, tree, gimple *,
5ce9450f
JJ
1697 gimple_stmt_iterator *);
1698
1699
1700/* Function vectorizable_mask_load_store.
1701
1702 Check if STMT performs a conditional load or store that can be vectorized.
1703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1704 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1705 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1706
1707static bool
355fe088
TS
1708vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1709 gimple **vec_stmt, slp_tree slp_node)
5ce9450f
JJ
1710{
1711 tree vec_dest = NULL;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1713 stmt_vec_info prev_stmt_info;
1714 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1715 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1716 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1717 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1718 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
57e2f6ad 1719 tree rhs_vectype = NULL_TREE;
045c1278 1720 tree mask_vectype;
5ce9450f 1721 tree elem_type;
355fe088 1722 gimple *new_stmt;
5ce9450f
JJ
1723 tree dummy;
1724 tree dataref_ptr = NULL_TREE;
355fe088 1725 gimple *ptr_incr;
5ce9450f
JJ
1726 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1727 int ncopies;
1728 int i, j;
1729 bool inv_p;
1730 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1731 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1732 int gather_scale = 1;
1733 enum vect_def_type gather_dt = vect_unknown_def_type;
1734 bool is_store;
1735 tree mask;
355fe088 1736 gimple *def_stmt;
5ce9450f
JJ
1737 enum vect_def_type dt;
1738
1739 if (slp_node != NULL)
1740 return false;
1741
1742 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1743 gcc_assert (ncopies >= 1);
1744
1745 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1746 mask = gimple_call_arg (stmt, 2);
045c1278
IE
1747
1748 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
5ce9450f
JJ
1749 return false;
1750
1751 /* FORNOW. This restriction should be relaxed. */
1752 if (nested_in_vect_loop && ncopies > 1)
1753 {
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1756 "multiple types in nested loop.");
1757 return false;
1758 }
1759
1760 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1761 return false;
1762
66c16fd9
RB
1763 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1764 && ! vec_stmt)
5ce9450f
JJ
1765 return false;
1766
1767 if (!STMT_VINFO_DATA_REF (stmt_info))
1768 return false;
1769
1770 elem_type = TREE_TYPE (vectype);
1771
1772 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1773 return false;
1774
f2e2a985 1775 if (STMT_VINFO_STRIDED_P (stmt_info))
5ce9450f
JJ
1776 return false;
1777
045c1278
IE
1778 if (TREE_CODE (mask) != SSA_NAME)
1779 return false;
1780
1781 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1782 return false;
1783
1784 if (!mask_vectype)
1785 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1786
56e39820 1787 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
045c1278
IE
1788 return false;
1789
57e2f6ad
IE
1790 if (is_store)
1791 {
1792 tree rhs = gimple_call_arg (stmt, 3);
1793 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1794 return false;
1795 }
1796
3bab6342 1797 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5ce9450f 1798 {
355fe088 1799 gimple *def_stmt;
3bab6342 1800 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
5ce9450f
JJ
1801 &gather_off, &gather_scale);
1802 gcc_assert (gather_decl);
81c40241
RB
1803 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1804 &gather_off_vectype))
5ce9450f
JJ
1805 {
1806 if (dump_enabled_p ())
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808 "gather index use not simple.");
1809 return false;
1810 }
03b9e8e4
JJ
1811
1812 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1813 tree masktype
1814 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1815 if (TREE_CODE (masktype) == INTEGER_TYPE)
1816 {
1817 if (dump_enabled_p ())
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1819 "masked gather with integer mask not supported.");
1820 return false;
1821 }
5ce9450f
JJ
1822 }
1823 else if (tree_int_cst_compare (nested_in_vect_loop
1824 ? STMT_VINFO_DR_STEP (stmt_info)
1825 : DR_STEP (dr), size_zero_node) <= 0)
1826 return false;
1827 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
045c1278
IE
1828 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1829 TYPE_MODE (mask_vectype),
57e2f6ad
IE
1830 !is_store)
1831 || (rhs_vectype
1832 && !useless_type_conversion_p (vectype, rhs_vectype)))
5ce9450f
JJ
1833 return false;
1834
5ce9450f
JJ
1835 if (!vec_stmt) /* transformation not required. */
1836 {
1837 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1838 if (is_store)
1839 vect_model_store_cost (stmt_info, ncopies, false, dt,
1840 NULL, NULL, NULL);
1841 else
1842 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1843 return true;
1844 }
1845
1846 /** Transform. **/
1847
3bab6342 1848 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5ce9450f
JJ
1849 {
1850 tree vec_oprnd0 = NULL_TREE, op;
1851 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1852 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
acdcd61b 1853 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
5ce9450f 1854 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
acdcd61b 1855 tree mask_perm_mask = NULL_TREE;
5ce9450f
JJ
1856 edge pe = loop_preheader_edge (loop);
1857 gimple_seq seq;
1858 basic_block new_bb;
1859 enum { NARROW, NONE, WIDEN } modifier;
1860 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1861
acdcd61b
JJ
1862 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1863 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1864 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1865 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1866 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1867 scaletype = TREE_VALUE (arglist);
1868 gcc_checking_assert (types_compatible_p (srctype, rettype)
1869 && types_compatible_p (srctype, masktype));
1870
5ce9450f
JJ
1871 if (nunits == gather_off_nunits)
1872 modifier = NONE;
1873 else if (nunits == gather_off_nunits / 2)
1874 {
1875 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1876 modifier = WIDEN;
1877
1878 for (i = 0; i < gather_off_nunits; ++i)
1879 sel[i] = i | nunits;
1880
557be5a8 1881 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5ce9450f
JJ
1882 }
1883 else if (nunits == gather_off_nunits * 2)
1884 {
1885 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1886 modifier = NARROW;
1887
1888 for (i = 0; i < nunits; ++i)
1889 sel[i] = i < gather_off_nunits
1890 ? i : i + nunits - gather_off_nunits;
1891
557be5a8 1892 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5ce9450f 1893 ncopies *= 2;
acdcd61b
JJ
1894 for (i = 0; i < nunits; ++i)
1895 sel[i] = i | gather_off_nunits;
557be5a8 1896 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
5ce9450f
JJ
1897 }
1898 else
1899 gcc_unreachable ();
1900
5ce9450f
JJ
1901 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1902
1903 ptr = fold_convert (ptrtype, gather_base);
1904 if (!is_gimple_min_invariant (ptr))
1905 {
1906 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1907 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1908 gcc_assert (!new_bb);
1909 }
1910
1911 scale = build_int_cst (scaletype, gather_scale);
1912
1913 prev_stmt_info = NULL;
1914 for (j = 0; j < ncopies; ++j)
1915 {
1916 if (modifier == WIDEN && (j & 1))
1917 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1918 perm_mask, stmt, gsi);
1919 else if (j == 0)
1920 op = vec_oprnd0
81c40241 1921 = vect_get_vec_def_for_operand (gather_off, stmt);
5ce9450f
JJ
1922 else
1923 op = vec_oprnd0
1924 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1925
1926 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1927 {
1928 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1929 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 1930 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5ce9450f
JJ
1931 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1932 new_stmt
0d0e4a03 1933 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1935 op = var;
1936 }
1937
acdcd61b
JJ
1938 if (mask_perm_mask && (j & 1))
1939 mask_op = permute_vec_elements (mask_op, mask_op,
1940 mask_perm_mask, stmt, gsi);
5ce9450f
JJ
1941 else
1942 {
acdcd61b 1943 if (j == 0)
81c40241 1944 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
acdcd61b
JJ
1945 else
1946 {
81c40241 1947 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
acdcd61b
JJ
1948 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1949 }
5ce9450f 1950
acdcd61b
JJ
1951 mask_op = vec_mask;
1952 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1953 {
1954 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1955 == TYPE_VECTOR_SUBPARTS (masktype));
0e22bb5a 1956 var = vect_get_new_ssa_name (masktype, vect_simple_var);
acdcd61b
JJ
1957 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1958 new_stmt
0d0e4a03 1959 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
acdcd61b
JJ
1960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1961 mask_op = var;
1962 }
5ce9450f
JJ
1963 }
1964
1965 new_stmt
1966 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1967 scale);
1968
1969 if (!useless_type_conversion_p (vectype, rettype))
1970 {
1971 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1972 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 1973 op = vect_get_new_ssa_name (rettype, vect_simple_var);
5ce9450f
JJ
1974 gimple_call_set_lhs (new_stmt, op);
1975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 1976 var = make_ssa_name (vec_dest);
5ce9450f 1977 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
0d0e4a03 1978 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5ce9450f
JJ
1979 }
1980 else
1981 {
1982 var = make_ssa_name (vec_dest, new_stmt);
1983 gimple_call_set_lhs (new_stmt, var);
1984 }
1985
1986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1987
1988 if (modifier == NARROW)
1989 {
1990 if ((j & 1) == 0)
1991 {
1992 prev_res = var;
1993 continue;
1994 }
1995 var = permute_vec_elements (prev_res, var,
1996 perm_mask, stmt, gsi);
1997 new_stmt = SSA_NAME_DEF_STMT (var);
1998 }
1999
2000 if (prev_stmt_info == NULL)
2001 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2002 else
2003 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2004 prev_stmt_info = vinfo_for_stmt (new_stmt);
2005 }
3efe2e2c
JJ
2006
2007 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2008 from the IL. */
e6f5c25d
IE
2009 if (STMT_VINFO_RELATED_STMT (stmt_info))
2010 {
2011 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2012 stmt_info = vinfo_for_stmt (stmt);
2013 }
3efe2e2c
JJ
2014 tree lhs = gimple_call_lhs (stmt);
2015 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2016 set_vinfo_for_stmt (new_stmt, stmt_info);
2017 set_vinfo_for_stmt (stmt, NULL);
2018 STMT_VINFO_STMT (stmt_info) = new_stmt;
2019 gsi_replace (gsi, new_stmt, true);
5ce9450f
JJ
2020 return true;
2021 }
2022 else if (is_store)
2023 {
2024 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2025 prev_stmt_info = NULL;
2026 for (i = 0; i < ncopies; i++)
2027 {
2028 unsigned align, misalign;
2029
2030 if (i == 0)
2031 {
2032 tree rhs = gimple_call_arg (stmt, 3);
81c40241
RB
2033 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2034 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
5ce9450f
JJ
2035 /* We should have catched mismatched types earlier. */
2036 gcc_assert (useless_type_conversion_p (vectype,
2037 TREE_TYPE (vec_rhs)));
2038 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2039 NULL_TREE, &dummy, gsi,
2040 &ptr_incr, false, &inv_p);
2041 gcc_assert (!inv_p);
2042 }
2043 else
2044 {
81c40241 2045 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
5ce9450f 2046 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
81c40241 2047 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2048 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2049 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2050 TYPE_SIZE_UNIT (vectype));
2051 }
2052
2053 align = TYPE_ALIGN_UNIT (vectype);
2054 if (aligned_access_p (dr))
2055 misalign = 0;
2056 else if (DR_MISALIGNMENT (dr) == -1)
2057 {
2058 align = TYPE_ALIGN_UNIT (elem_type);
2059 misalign = 0;
2060 }
2061 else
2062 misalign = DR_MISALIGNMENT (dr);
2063 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2064 misalign);
08554c26
JJ
2065 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2066 misalign ? misalign & -misalign : align);
5ce9450f
JJ
2067 new_stmt
2068 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
08554c26 2069 ptr, vec_mask, vec_rhs);
5ce9450f
JJ
2070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2071 if (i == 0)
2072 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2073 else
2074 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2075 prev_stmt_info = vinfo_for_stmt (new_stmt);
2076 }
2077 }
2078 else
2079 {
2080 tree vec_mask = NULL_TREE;
2081 prev_stmt_info = NULL;
2082 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2083 for (i = 0; i < ncopies; i++)
2084 {
2085 unsigned align, misalign;
2086
2087 if (i == 0)
2088 {
81c40241 2089 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
5ce9450f
JJ
2090 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2091 NULL_TREE, &dummy, gsi,
2092 &ptr_incr, false, &inv_p);
2093 gcc_assert (!inv_p);
2094 }
2095 else
2096 {
81c40241 2097 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
5ce9450f
JJ
2098 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2099 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2100 TYPE_SIZE_UNIT (vectype));
2101 }
2102
2103 align = TYPE_ALIGN_UNIT (vectype);
2104 if (aligned_access_p (dr))
2105 misalign = 0;
2106 else if (DR_MISALIGNMENT (dr) == -1)
2107 {
2108 align = TYPE_ALIGN_UNIT (elem_type);
2109 misalign = 0;
2110 }
2111 else
2112 misalign = DR_MISALIGNMENT (dr);
2113 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2114 misalign);
08554c26
JJ
2115 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2116 misalign ? misalign & -misalign : align);
5ce9450f
JJ
2117 new_stmt
2118 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
08554c26 2119 ptr, vec_mask);
b731b390 2120 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
5ce9450f
JJ
2121 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122 if (i == 0)
2123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124 else
2125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2126 prev_stmt_info = vinfo_for_stmt (new_stmt);
2127 }
2128 }
2129
3efe2e2c
JJ
2130 if (!is_store)
2131 {
2132 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2133 from the IL. */
e6f5c25d
IE
2134 if (STMT_VINFO_RELATED_STMT (stmt_info))
2135 {
2136 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2137 stmt_info = vinfo_for_stmt (stmt);
2138 }
3efe2e2c
JJ
2139 tree lhs = gimple_call_lhs (stmt);
2140 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2141 set_vinfo_for_stmt (new_stmt, stmt_info);
2142 set_vinfo_for_stmt (stmt, NULL);
2143 STMT_VINFO_STMT (stmt_info) = new_stmt;
2144 gsi_replace (gsi, new_stmt, true);
2145 }
2146
5ce9450f
JJ
2147 return true;
2148}
2149
b1b6836e
RS
2150/* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2151 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2152 in a single step. On success, store the binary pack code in
2153 *CONVERT_CODE. */
2154
2155static bool
2156simple_integer_narrowing (tree vectype_out, tree vectype_in,
2157 tree_code *convert_code)
2158{
2159 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2160 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2161 return false;
2162
2163 tree_code code;
2164 int multi_step_cvt = 0;
2165 auto_vec <tree, 8> interm_types;
2166 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2167 &code, &multi_step_cvt,
2168 &interm_types)
2169 || multi_step_cvt)
2170 return false;
2171
2172 *convert_code = code;
2173 return true;
2174}
5ce9450f 2175
ebfd146a
IR
2176/* Function vectorizable_call.
2177
538dd0b7 2178 Check if GS performs a function call that can be vectorized.
b8698a0f 2179 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2180 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2181 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2182
2183static bool
355fe088 2184vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
190c2236 2185 slp_tree slp_node)
ebfd146a 2186{
538dd0b7 2187 gcall *stmt;
ebfd146a
IR
2188 tree vec_dest;
2189 tree scalar_dest;
2190 tree op, type;
2191 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
538dd0b7 2192 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
ebfd146a
IR
2193 tree vectype_out, vectype_in;
2194 int nunits_in;
2195 int nunits_out;
2196 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
190c2236 2197 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2198 vec_info *vinfo = stmt_info->vinfo;
81c40241 2199 tree fndecl, new_temp, rhs_type;
355fe088 2200 gimple *def_stmt;
0502fb85
UB
2201 enum vect_def_type dt[3]
2202 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
355fe088 2203 gimple *new_stmt = NULL;
ebfd146a 2204 int ncopies, j;
6e1aa848 2205 vec<tree> vargs = vNULL;
ebfd146a
IR
2206 enum { NARROW, NONE, WIDEN } modifier;
2207 size_t i, nargs;
9d5e7640 2208 tree lhs;
ebfd146a 2209
190c2236 2210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2211 return false;
2212
66c16fd9
RB
2213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2214 && ! vec_stmt)
ebfd146a
IR
2215 return false;
2216
538dd0b7
DM
2217 /* Is GS a vectorizable call? */
2218 stmt = dyn_cast <gcall *> (gs);
2219 if (!stmt)
ebfd146a
IR
2220 return false;
2221
5ce9450f
JJ
2222 if (gimple_call_internal_p (stmt)
2223 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2224 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2225 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2226 slp_node);
2227
0136f8f0
AH
2228 if (gimple_call_lhs (stmt) == NULL_TREE
2229 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
ebfd146a
IR
2230 return false;
2231
0136f8f0 2232 gcc_checking_assert (!stmt_can_throw_internal (stmt));
5a2c1986 2233
b690cc0f
RG
2234 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2235
ebfd146a
IR
2236 /* Process function arguments. */
2237 rhs_type = NULL_TREE;
b690cc0f 2238 vectype_in = NULL_TREE;
ebfd146a
IR
2239 nargs = gimple_call_num_args (stmt);
2240
1b1562a5
MM
2241 /* Bail out if the function has more than three arguments, we do not have
2242 interesting builtin functions to vectorize with more than two arguments
2243 except for fma. No arguments is also not good. */
2244 if (nargs == 0 || nargs > 3)
ebfd146a
IR
2245 return false;
2246
74bf76ed
JJ
2247 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2248 if (gimple_call_internal_p (stmt)
2249 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2250 {
2251 nargs = 0;
2252 rhs_type = unsigned_type_node;
2253 }
2254
ebfd146a
IR
2255 for (i = 0; i < nargs; i++)
2256 {
b690cc0f
RG
2257 tree opvectype;
2258
ebfd146a
IR
2259 op = gimple_call_arg (stmt, i);
2260
2261 /* We can only handle calls with arguments of the same type. */
2262 if (rhs_type
8533c9d8 2263 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a 2264 {
73fbfcad 2265 if (dump_enabled_p ())
78c60e3d 2266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2267 "argument types differ.\n");
ebfd146a
IR
2268 return false;
2269 }
b690cc0f
RG
2270 if (!rhs_type)
2271 rhs_type = TREE_TYPE (op);
ebfd146a 2272
81c40241 2273 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
ebfd146a 2274 {
73fbfcad 2275 if (dump_enabled_p ())
78c60e3d 2276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2277 "use not simple.\n");
ebfd146a
IR
2278 return false;
2279 }
ebfd146a 2280
b690cc0f
RG
2281 if (!vectype_in)
2282 vectype_in = opvectype;
2283 else if (opvectype
2284 && opvectype != vectype_in)
2285 {
73fbfcad 2286 if (dump_enabled_p ())
78c60e3d 2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2288 "argument vector types differ.\n");
b690cc0f
RG
2289 return false;
2290 }
2291 }
2292 /* If all arguments are external or constant defs use a vector type with
2293 the same size as the output vector type. */
ebfd146a 2294 if (!vectype_in)
b690cc0f 2295 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
2296 if (vec_stmt)
2297 gcc_assert (vectype_in);
2298 if (!vectype_in)
2299 {
73fbfcad 2300 if (dump_enabled_p ())
7d8930a0 2301 {
78c60e3d
SS
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "no vectype for scalar type ");
2304 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 2305 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
2306 }
2307
2308 return false;
2309 }
ebfd146a
IR
2310
2311 /* FORNOW */
b690cc0f
RG
2312 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2313 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
2314 if (nunits_in == nunits_out / 2)
2315 modifier = NARROW;
2316 else if (nunits_out == nunits_in)
2317 modifier = NONE;
2318 else if (nunits_out == nunits_in / 2)
2319 modifier = WIDEN;
2320 else
2321 return false;
2322
70439f0d
RS
2323 /* We only handle functions that do not read or clobber memory. */
2324 if (gimple_vuse (stmt))
2325 {
2326 if (dump_enabled_p ())
2327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2328 "function reads from or writes to memory.\n");
2329 return false;
2330 }
2331
ebfd146a
IR
2332 /* For now, we only vectorize functions if a target specific builtin
2333 is available. TODO -- in some cases, it might be profitable to
2334 insert the calls for pieces of the vector, in order to be able
2335 to vectorize other operations in the loop. */
70439f0d
RS
2336 fndecl = NULL_TREE;
2337 internal_fn ifn = IFN_LAST;
2338 combined_fn cfn = gimple_call_combined_fn (stmt);
2339 tree callee = gimple_call_fndecl (stmt);
2340
2341 /* First try using an internal function. */
b1b6836e
RS
2342 tree_code convert_code = ERROR_MARK;
2343 if (cfn != CFN_LAST
2344 && (modifier == NONE
2345 || (modifier == NARROW
2346 && simple_integer_narrowing (vectype_out, vectype_in,
2347 &convert_code))))
70439f0d
RS
2348 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2349 vectype_in);
2350
2351 /* If that fails, try asking for a target-specific built-in function. */
2352 if (ifn == IFN_LAST)
2353 {
2354 if (cfn != CFN_LAST)
2355 fndecl = targetm.vectorize.builtin_vectorized_function
2356 (cfn, vectype_out, vectype_in);
2357 else
2358 fndecl = targetm.vectorize.builtin_md_vectorized_function
2359 (callee, vectype_out, vectype_in);
2360 }
2361
2362 if (ifn == IFN_LAST && !fndecl)
ebfd146a 2363 {
70439f0d 2364 if (cfn == CFN_GOMP_SIMD_LANE
74bf76ed
JJ
2365 && !slp_node
2366 && loop_vinfo
2367 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2368 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2369 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2370 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2371 {
2372 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2373 { 0, 1, 2, ... vf - 1 } vector. */
2374 gcc_assert (nargs == 0);
2375 }
2376 else
2377 {
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2380 "function is not vectorizable.\n");
74bf76ed
JJ
2381 return false;
2382 }
ebfd146a
IR
2383 }
2384
190c2236
JJ
2385 if (slp_node || PURE_SLP_STMT (stmt_info))
2386 ncopies = 1;
b1b6836e 2387 else if (modifier == NARROW && ifn == IFN_LAST)
ebfd146a
IR
2388 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2389 else
2390 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2391
2392 /* Sanity check: make sure that at least one copy of the vectorized stmt
2393 needs to be generated. */
2394 gcc_assert (ncopies >= 1);
2395
2396 if (!vec_stmt) /* transformation not required. */
2397 {
2398 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
73fbfcad 2399 if (dump_enabled_p ())
e645e942
TJ
2400 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2401 "\n");
c3e7ee41 2402 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
b1b6836e
RS
2403 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2404 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2405 vec_promote_demote, stmt_info, 0, vect_body);
2406
ebfd146a
IR
2407 return true;
2408 }
2409
2410 /** Transform. **/
2411
73fbfcad 2412 if (dump_enabled_p ())
e645e942 2413 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
ebfd146a
IR
2414
2415 /* Handle def. */
2416 scalar_dest = gimple_call_lhs (stmt);
2417 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2418
2419 prev_stmt_info = NULL;
b1b6836e 2420 if (modifier == NONE || ifn != IFN_LAST)
ebfd146a 2421 {
b1b6836e 2422 tree prev_res = NULL_TREE;
ebfd146a
IR
2423 for (j = 0; j < ncopies; ++j)
2424 {
2425 /* Build argument list for the vectorized call. */
2426 if (j == 0)
9771b263 2427 vargs.create (nargs);
ebfd146a 2428 else
9771b263 2429 vargs.truncate (0);
ebfd146a 2430
190c2236
JJ
2431 if (slp_node)
2432 {
ef062b13 2433 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2434 vec<tree> vec_oprnds0;
190c2236
JJ
2435
2436 for (i = 0; i < nargs; i++)
9771b263 2437 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2438 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2439 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2440
2441 /* Arguments are ready. Create the new vector stmt. */
9771b263 2442 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
190c2236
JJ
2443 {
2444 size_t k;
2445 for (k = 0; k < nargs; k++)
2446 {
37b5ec8f 2447 vec<tree> vec_oprndsk = vec_defs[k];
9771b263 2448 vargs[k] = vec_oprndsk[i];
190c2236 2449 }
b1b6836e
RS
2450 if (modifier == NARROW)
2451 {
2452 tree half_res = make_ssa_name (vectype_in);
2453 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2454 gimple_call_set_lhs (new_stmt, half_res);
2455 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2456 if ((i & 1) == 0)
2457 {
2458 prev_res = half_res;
2459 continue;
2460 }
2461 new_temp = make_ssa_name (vec_dest);
2462 new_stmt = gimple_build_assign (new_temp, convert_code,
2463 prev_res, half_res);
2464 }
70439f0d 2465 else
b1b6836e
RS
2466 {
2467 if (ifn != IFN_LAST)
2468 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2469 else
2470 new_stmt = gimple_build_call_vec (fndecl, vargs);
2471 new_temp = make_ssa_name (vec_dest, new_stmt);
2472 gimple_call_set_lhs (new_stmt, new_temp);
2473 }
190c2236 2474 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2475 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2476 }
2477
2478 for (i = 0; i < nargs; i++)
2479 {
37b5ec8f 2480 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2481 vec_oprndsi.release ();
190c2236 2482 }
190c2236
JJ
2483 continue;
2484 }
2485
ebfd146a
IR
2486 for (i = 0; i < nargs; i++)
2487 {
2488 op = gimple_call_arg (stmt, i);
2489 if (j == 0)
2490 vec_oprnd0
81c40241 2491 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2492 else
63827fb8
IR
2493 {
2494 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2495 vec_oprnd0
2496 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2497 }
ebfd146a 2498
9771b263 2499 vargs.quick_push (vec_oprnd0);
ebfd146a
IR
2500 }
2501
74bf76ed
JJ
2502 if (gimple_call_internal_p (stmt)
2503 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2504 {
2505 tree *v = XALLOCAVEC (tree, nunits_out);
2506 int k;
2507 for (k = 0; k < nunits_out; ++k)
2508 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2509 tree cst = build_vector (vectype_out, v);
2510 tree new_var
0e22bb5a 2511 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
355fe088 2512 gimple *init_stmt = gimple_build_assign (new_var, cst);
74bf76ed 2513 vect_init_vector_1 (stmt, init_stmt, NULL);
b731b390 2514 new_temp = make_ssa_name (vec_dest);
0e22bb5a 2515 new_stmt = gimple_build_assign (new_temp, new_var);
74bf76ed 2516 }
b1b6836e
RS
2517 else if (modifier == NARROW)
2518 {
2519 tree half_res = make_ssa_name (vectype_in);
2520 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2521 gimple_call_set_lhs (new_stmt, half_res);
2522 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2523 if ((j & 1) == 0)
2524 {
2525 prev_res = half_res;
2526 continue;
2527 }
2528 new_temp = make_ssa_name (vec_dest);
2529 new_stmt = gimple_build_assign (new_temp, convert_code,
2530 prev_res, half_res);
2531 }
74bf76ed
JJ
2532 else
2533 {
70439f0d
RS
2534 if (ifn != IFN_LAST)
2535 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2536 else
2537 new_stmt = gimple_build_call_vec (fndecl, vargs);
74bf76ed
JJ
2538 new_temp = make_ssa_name (vec_dest, new_stmt);
2539 gimple_call_set_lhs (new_stmt, new_temp);
2540 }
ebfd146a
IR
2541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2542
b1b6836e 2543 if (j == (modifier == NARROW ? 1 : 0))
ebfd146a
IR
2544 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2545 else
2546 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2547
2548 prev_stmt_info = vinfo_for_stmt (new_stmt);
2549 }
b1b6836e
RS
2550 }
2551 else if (modifier == NARROW)
2552 {
ebfd146a
IR
2553 for (j = 0; j < ncopies; ++j)
2554 {
2555 /* Build argument list for the vectorized call. */
2556 if (j == 0)
9771b263 2557 vargs.create (nargs * 2);
ebfd146a 2558 else
9771b263 2559 vargs.truncate (0);
ebfd146a 2560
190c2236
JJ
2561 if (slp_node)
2562 {
ef062b13 2563 auto_vec<vec<tree> > vec_defs (nargs);
9771b263 2564 vec<tree> vec_oprnds0;
190c2236
JJ
2565
2566 for (i = 0; i < nargs; i++)
9771b263 2567 vargs.quick_push (gimple_call_arg (stmt, i));
190c2236 2568 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
37b5ec8f 2569 vec_oprnds0 = vec_defs[0];
190c2236
JJ
2570
2571 /* Arguments are ready. Create the new vector stmt. */
9771b263 2572 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
190c2236
JJ
2573 {
2574 size_t k;
9771b263 2575 vargs.truncate (0);
190c2236
JJ
2576 for (k = 0; k < nargs; k++)
2577 {
37b5ec8f 2578 vec<tree> vec_oprndsk = vec_defs[k];
9771b263
DN
2579 vargs.quick_push (vec_oprndsk[i]);
2580 vargs.quick_push (vec_oprndsk[i + 1]);
190c2236 2581 }
70439f0d
RS
2582 if (ifn != IFN_LAST)
2583 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2584 else
2585 new_stmt = gimple_build_call_vec (fndecl, vargs);
190c2236
JJ
2586 new_temp = make_ssa_name (vec_dest, new_stmt);
2587 gimple_call_set_lhs (new_stmt, new_temp);
2588 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 2589 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
190c2236
JJ
2590 }
2591
2592 for (i = 0; i < nargs; i++)
2593 {
37b5ec8f 2594 vec<tree> vec_oprndsi = vec_defs[i];
9771b263 2595 vec_oprndsi.release ();
190c2236 2596 }
190c2236
JJ
2597 continue;
2598 }
2599
ebfd146a
IR
2600 for (i = 0; i < nargs; i++)
2601 {
2602 op = gimple_call_arg (stmt, i);
2603 if (j == 0)
2604 {
2605 vec_oprnd0
81c40241 2606 = vect_get_vec_def_for_operand (op, stmt);
ebfd146a 2607 vec_oprnd1
63827fb8 2608 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2609 }
2610 else
2611 {
336ecb65 2612 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
ebfd146a 2613 vec_oprnd0
63827fb8 2614 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 2615 vec_oprnd1
63827fb8 2616 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
2617 }
2618
9771b263
DN
2619 vargs.quick_push (vec_oprnd0);
2620 vargs.quick_push (vec_oprnd1);
ebfd146a
IR
2621 }
2622
b1b6836e 2623 new_stmt = gimple_build_call_vec (fndecl, vargs);
ebfd146a
IR
2624 new_temp = make_ssa_name (vec_dest, new_stmt);
2625 gimple_call_set_lhs (new_stmt, new_temp);
ebfd146a
IR
2626 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2627
2628 if (j == 0)
2629 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2630 else
2631 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2632
2633 prev_stmt_info = vinfo_for_stmt (new_stmt);
2634 }
2635
2636 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a 2637 }
b1b6836e
RS
2638 else
2639 /* No current target implements this case. */
2640 return false;
ebfd146a 2641
9771b263 2642 vargs.release ();
ebfd146a 2643
ebfd146a
IR
2644 /* The call in STMT might prevent it from being removed in dce.
2645 We however cannot remove it here, due to the way the ssa name
2646 it defines is mapped to the new definition. So just replace
2647 rhs of the statement with something harmless. */
2648
dd34c087
JJ
2649 if (slp_node)
2650 return true;
2651
ebfd146a 2652 type = TREE_TYPE (scalar_dest);
9d5e7640
IR
2653 if (is_pattern_stmt_p (stmt_info))
2654 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2655 else
2656 lhs = gimple_call_lhs (stmt);
3cc2fa2a
JJ
2657
2658 if (gimple_call_internal_p (stmt)
2659 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2660 {
2661 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2662 with vf - 1 rather than 0, that is the last iteration of the
2663 vectorized loop. */
2664 imm_use_iterator iter;
2665 use_operand_p use_p;
355fe088 2666 gimple *use_stmt;
3cc2fa2a
JJ
2667 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2668 {
2669 basic_block use_bb = gimple_bb (use_stmt);
2670 if (use_bb
2671 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2672 {
2673 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2674 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2675 ncopies * nunits_out - 1));
2676 update_stmt (use_stmt);
2677 }
2678 }
2679 }
2680
9d5e7640 2681 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
ebfd146a 2682 set_vinfo_for_stmt (new_stmt, stmt_info);
dd34c087 2683 set_vinfo_for_stmt (stmt, NULL);
ebfd146a
IR
2684 STMT_VINFO_STMT (stmt_info) = new_stmt;
2685 gsi_replace (gsi, new_stmt, false);
ebfd146a
IR
2686
2687 return true;
2688}
2689
2690
0136f8f0
AH
2691struct simd_call_arg_info
2692{
2693 tree vectype;
2694 tree op;
2695 enum vect_def_type dt;
2696 HOST_WIDE_INT linear_step;
2697 unsigned int align;
17b658af 2698 bool simd_lane_linear;
0136f8f0
AH
2699};
2700
17b658af
JJ
2701/* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2702 is linear within simd lane (but not within whole loop), note it in
2703 *ARGINFO. */
2704
2705static void
2706vect_simd_lane_linear (tree op, struct loop *loop,
2707 struct simd_call_arg_info *arginfo)
2708{
355fe088 2709 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
17b658af
JJ
2710
2711 if (!is_gimple_assign (def_stmt)
2712 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2713 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2714 return;
2715
2716 tree base = gimple_assign_rhs1 (def_stmt);
2717 HOST_WIDE_INT linear_step = 0;
2718 tree v = gimple_assign_rhs2 (def_stmt);
2719 while (TREE_CODE (v) == SSA_NAME)
2720 {
2721 tree t;
2722 def_stmt = SSA_NAME_DEF_STMT (v);
2723 if (is_gimple_assign (def_stmt))
2724 switch (gimple_assign_rhs_code (def_stmt))
2725 {
2726 case PLUS_EXPR:
2727 t = gimple_assign_rhs2 (def_stmt);
2728 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2729 return;
2730 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2731 v = gimple_assign_rhs1 (def_stmt);
2732 continue;
2733 case MULT_EXPR:
2734 t = gimple_assign_rhs2 (def_stmt);
2735 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2736 return;
2737 linear_step = tree_to_shwi (t);
2738 v = gimple_assign_rhs1 (def_stmt);
2739 continue;
2740 CASE_CONVERT:
2741 t = gimple_assign_rhs1 (def_stmt);
2742 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2743 || (TYPE_PRECISION (TREE_TYPE (v))
2744 < TYPE_PRECISION (TREE_TYPE (t))))
2745 return;
2746 if (!linear_step)
2747 linear_step = 1;
2748 v = t;
2749 continue;
2750 default:
2751 return;
2752 }
2753 else if (is_gimple_call (def_stmt)
2754 && gimple_call_internal_p (def_stmt)
2755 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2756 && loop->simduid
2757 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2758 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2759 == loop->simduid))
2760 {
2761 if (!linear_step)
2762 linear_step = 1;
2763 arginfo->linear_step = linear_step;
2764 arginfo->op = base;
2765 arginfo->simd_lane_linear = true;
2766 return;
2767 }
2768 }
2769}
2770
0136f8f0
AH
2771/* Function vectorizable_simd_clone_call.
2772
2773 Check if STMT performs a function call that can be vectorized
2774 by calling a simd clone of the function.
2775 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2776 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2777 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2778
2779static bool
355fe088
TS
2780vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2781 gimple **vec_stmt, slp_tree slp_node)
0136f8f0
AH
2782{
2783 tree vec_dest;
2784 tree scalar_dest;
2785 tree op, type;
2786 tree vec_oprnd0 = NULL_TREE;
2787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2788 tree vectype;
2789 unsigned int nunits;
2790 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2791 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 2792 vec_info *vinfo = stmt_info->vinfo;
0136f8f0 2793 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
81c40241 2794 tree fndecl, new_temp;
355fe088
TS
2795 gimple *def_stmt;
2796 gimple *new_stmt = NULL;
0136f8f0
AH
2797 int ncopies, j;
2798 vec<simd_call_arg_info> arginfo = vNULL;
2799 vec<tree> vargs = vNULL;
2800 size_t i, nargs;
2801 tree lhs, rtype, ratype;
2802 vec<constructor_elt, va_gc> *ret_ctor_elts;
2803
2804 /* Is STMT a vectorizable call? */
2805 if (!is_gimple_call (stmt))
2806 return false;
2807
2808 fndecl = gimple_call_fndecl (stmt);
2809 if (fndecl == NULL_TREE)
2810 return false;
2811
d52f5295 2812 struct cgraph_node *node = cgraph_node::get (fndecl);
0136f8f0
AH
2813 if (node == NULL || node->simd_clones == NULL)
2814 return false;
2815
2816 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2817 return false;
2818
66c16fd9
RB
2819 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2820 && ! vec_stmt)
0136f8f0
AH
2821 return false;
2822
2823 if (gimple_call_lhs (stmt)
2824 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2825 return false;
2826
2827 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2828
2829 vectype = STMT_VINFO_VECTYPE (stmt_info);
2830
2831 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2832 return false;
2833
2834 /* FORNOW */
2835 if (slp_node || PURE_SLP_STMT (stmt_info))
2836 return false;
2837
2838 /* Process function arguments. */
2839 nargs = gimple_call_num_args (stmt);
2840
2841 /* Bail out if the function has zero arguments. */
2842 if (nargs == 0)
2843 return false;
2844
2845 arginfo.create (nargs);
2846
2847 for (i = 0; i < nargs; i++)
2848 {
2849 simd_call_arg_info thisarginfo;
2850 affine_iv iv;
2851
2852 thisarginfo.linear_step = 0;
2853 thisarginfo.align = 0;
2854 thisarginfo.op = NULL_TREE;
17b658af 2855 thisarginfo.simd_lane_linear = false;
0136f8f0
AH
2856
2857 op = gimple_call_arg (stmt, i);
81c40241
RB
2858 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2859 &thisarginfo.vectype)
0136f8f0
AH
2860 || thisarginfo.dt == vect_uninitialized_def)
2861 {
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2864 "use not simple.\n");
2865 arginfo.release ();
2866 return false;
2867 }
2868
2869 if (thisarginfo.dt == vect_constant_def
2870 || thisarginfo.dt == vect_external_def)
2871 gcc_assert (thisarginfo.vectype == NULL_TREE);
2872 else
2873 gcc_assert (thisarginfo.vectype != NULL_TREE);
2874
6c9e85fb
JJ
2875 /* For linear arguments, the analyze phase should have saved
2876 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
17b658af
JJ
2877 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2878 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
6c9e85fb
JJ
2879 {
2880 gcc_assert (vec_stmt);
2881 thisarginfo.linear_step
17b658af 2882 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
6c9e85fb 2883 thisarginfo.op
17b658af
JJ
2884 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2885 thisarginfo.simd_lane_linear
2886 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2887 == boolean_true_node);
6c9e85fb
JJ
2888 /* If loop has been peeled for alignment, we need to adjust it. */
2889 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2890 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
17b658af 2891 if (n1 != n2 && !thisarginfo.simd_lane_linear)
6c9e85fb
JJ
2892 {
2893 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
17b658af 2894 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
6c9e85fb
JJ
2895 tree opt = TREE_TYPE (thisarginfo.op);
2896 bias = fold_convert (TREE_TYPE (step), bias);
2897 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2898 thisarginfo.op
2899 = fold_build2 (POINTER_TYPE_P (opt)
2900 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2901 thisarginfo.op, bias);
2902 }
2903 }
2904 else if (!vec_stmt
2905 && thisarginfo.dt != vect_constant_def
2906 && thisarginfo.dt != vect_external_def
2907 && loop_vinfo
2908 && TREE_CODE (op) == SSA_NAME
2909 && simple_iv (loop, loop_containing_stmt (stmt), op,
2910 &iv, false)
2911 && tree_fits_shwi_p (iv.step))
0136f8f0
AH
2912 {
2913 thisarginfo.linear_step = tree_to_shwi (iv.step);
2914 thisarginfo.op = iv.base;
2915 }
2916 else if ((thisarginfo.dt == vect_constant_def
2917 || thisarginfo.dt == vect_external_def)
2918 && POINTER_TYPE_P (TREE_TYPE (op)))
2919 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
17b658af
JJ
2920 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2921 linear too. */
2922 if (POINTER_TYPE_P (TREE_TYPE (op))
2923 && !thisarginfo.linear_step
2924 && !vec_stmt
2925 && thisarginfo.dt != vect_constant_def
2926 && thisarginfo.dt != vect_external_def
2927 && loop_vinfo
2928 && !slp_node
2929 && TREE_CODE (op) == SSA_NAME)
2930 vect_simd_lane_linear (op, loop, &thisarginfo);
0136f8f0
AH
2931
2932 arginfo.quick_push (thisarginfo);
2933 }
2934
2935 unsigned int badness = 0;
2936 struct cgraph_node *bestn = NULL;
6c9e85fb
JJ
2937 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2938 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
0136f8f0
AH
2939 else
2940 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2941 n = n->simdclone->next_clone)
2942 {
2943 unsigned int this_badness = 0;
2944 if (n->simdclone->simdlen
2945 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2946 || n->simdclone->nargs != nargs)
2947 continue;
2948 if (n->simdclone->simdlen
2949 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2950 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2951 - exact_log2 (n->simdclone->simdlen)) * 1024;
2952 if (n->simdclone->inbranch)
2953 this_badness += 2048;
2954 int target_badness = targetm.simd_clone.usable (n);
2955 if (target_badness < 0)
2956 continue;
2957 this_badness += target_badness * 512;
2958 /* FORNOW: Have to add code to add the mask argument. */
2959 if (n->simdclone->inbranch)
2960 continue;
2961 for (i = 0; i < nargs; i++)
2962 {
2963 switch (n->simdclone->args[i].arg_type)
2964 {
2965 case SIMD_CLONE_ARG_TYPE_VECTOR:
2966 if (!useless_type_conversion_p
2967 (n->simdclone->args[i].orig_type,
2968 TREE_TYPE (gimple_call_arg (stmt, i))))
2969 i = -1;
2970 else if (arginfo[i].dt == vect_constant_def
2971 || arginfo[i].dt == vect_external_def
2972 || arginfo[i].linear_step)
2973 this_badness += 64;
2974 break;
2975 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2976 if (arginfo[i].dt != vect_constant_def
2977 && arginfo[i].dt != vect_external_def)
2978 i = -1;
2979 break;
2980 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
d9a6bd32 2981 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
0136f8f0
AH
2982 if (arginfo[i].dt == vect_constant_def
2983 || arginfo[i].dt == vect_external_def
2984 || (arginfo[i].linear_step
2985 != n->simdclone->args[i].linear_step))
2986 i = -1;
2987 break;
2988 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
d9a6bd32
JJ
2989 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2990 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
e01d41e5
JJ
2991 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2992 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2993 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
2994 /* FORNOW */
2995 i = -1;
2996 break;
2997 case SIMD_CLONE_ARG_TYPE_MASK:
2998 gcc_unreachable ();
2999 }
3000 if (i == (size_t) -1)
3001 break;
3002 if (n->simdclone->args[i].alignment > arginfo[i].align)
3003 {
3004 i = -1;
3005 break;
3006 }
3007 if (arginfo[i].align)
3008 this_badness += (exact_log2 (arginfo[i].align)
3009 - exact_log2 (n->simdclone->args[i].alignment));
3010 }
3011 if (i == (size_t) -1)
3012 continue;
3013 if (bestn == NULL || this_badness < badness)
3014 {
3015 bestn = n;
3016 badness = this_badness;
3017 }
3018 }
3019
3020 if (bestn == NULL)
3021 {
3022 arginfo.release ();
3023 return false;
3024 }
3025
3026 for (i = 0; i < nargs; i++)
3027 if ((arginfo[i].dt == vect_constant_def
3028 || arginfo[i].dt == vect_external_def)
3029 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3030 {
3031 arginfo[i].vectype
3032 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3033 i)));
3034 if (arginfo[i].vectype == NULL
3035 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3036 > bestn->simdclone->simdlen))
3037 {
3038 arginfo.release ();
3039 return false;
3040 }
3041 }
3042
3043 fndecl = bestn->decl;
3044 nunits = bestn->simdclone->simdlen;
3045 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3046
3047 /* If the function isn't const, only allow it in simd loops where user
3048 has asserted that at least nunits consecutive iterations can be
3049 performed using SIMD instructions. */
3050 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3051 && gimple_vuse (stmt))
3052 {
3053 arginfo.release ();
3054 return false;
3055 }
3056
3057 /* Sanity check: make sure that at least one copy of the vectorized stmt
3058 needs to be generated. */
3059 gcc_assert (ncopies >= 1);
3060
3061 if (!vec_stmt) /* transformation not required. */
3062 {
6c9e85fb
JJ
3063 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3064 for (i = 0; i < nargs; i++)
3065 if (bestn->simdclone->args[i].arg_type
3066 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3067 {
17b658af 3068 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
6c9e85fb
JJ
3069 + 1);
3070 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3071 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3072 ? size_type_node : TREE_TYPE (arginfo[i].op);
3073 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3074 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
17b658af
JJ
3075 tree sll = arginfo[i].simd_lane_linear
3076 ? boolean_true_node : boolean_false_node;
3077 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
6c9e85fb 3078 }
0136f8f0
AH
3079 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3080 if (dump_enabled_p ())
3081 dump_printf_loc (MSG_NOTE, vect_location,
3082 "=== vectorizable_simd_clone_call ===\n");
3083/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3084 arginfo.release ();
3085 return true;
3086 }
3087
3088 /** Transform. **/
3089
3090 if (dump_enabled_p ())
3091 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3092
3093 /* Handle def. */
3094 scalar_dest = gimple_call_lhs (stmt);
3095 vec_dest = NULL_TREE;
3096 rtype = NULL_TREE;
3097 ratype = NULL_TREE;
3098 if (scalar_dest)
3099 {
3100 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3101 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3102 if (TREE_CODE (rtype) == ARRAY_TYPE)
3103 {
3104 ratype = rtype;
3105 rtype = TREE_TYPE (ratype);
3106 }
3107 }
3108
3109 prev_stmt_info = NULL;
3110 for (j = 0; j < ncopies; ++j)
3111 {
3112 /* Build argument list for the vectorized call. */
3113 if (j == 0)
3114 vargs.create (nargs);
3115 else
3116 vargs.truncate (0);
3117
3118 for (i = 0; i < nargs; i++)
3119 {
3120 unsigned int k, l, m, o;
3121 tree atype;
3122 op = gimple_call_arg (stmt, i);
3123 switch (bestn->simdclone->args[i].arg_type)
3124 {
3125 case SIMD_CLONE_ARG_TYPE_VECTOR:
3126 atype = bestn->simdclone->args[i].vector_type;
3127 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3128 for (m = j * o; m < (j + 1) * o; m++)
3129 {
3130 if (TYPE_VECTOR_SUBPARTS (atype)
3131 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3132 {
3133 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3134 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3135 / TYPE_VECTOR_SUBPARTS (atype));
3136 gcc_assert ((k & (k - 1)) == 0);
3137 if (m == 0)
3138 vec_oprnd0
81c40241 3139 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3140 else
3141 {
3142 vec_oprnd0 = arginfo[i].op;
3143 if ((m & (k - 1)) == 0)
3144 vec_oprnd0
3145 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3146 vec_oprnd0);
3147 }
3148 arginfo[i].op = vec_oprnd0;
3149 vec_oprnd0
3150 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3151 size_int (prec),
3152 bitsize_int ((m & (k - 1)) * prec));
3153 new_stmt
b731b390 3154 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3155 vec_oprnd0);
3156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3157 vargs.safe_push (gimple_assign_lhs (new_stmt));
3158 }
3159 else
3160 {
3161 k = (TYPE_VECTOR_SUBPARTS (atype)
3162 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3163 gcc_assert ((k & (k - 1)) == 0);
3164 vec<constructor_elt, va_gc> *ctor_elts;
3165 if (k != 1)
3166 vec_alloc (ctor_elts, k);
3167 else
3168 ctor_elts = NULL;
3169 for (l = 0; l < k; l++)
3170 {
3171 if (m == 0 && l == 0)
3172 vec_oprnd0
81c40241 3173 = vect_get_vec_def_for_operand (op, stmt);
0136f8f0
AH
3174 else
3175 vec_oprnd0
3176 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3177 arginfo[i].op);
3178 arginfo[i].op = vec_oprnd0;
3179 if (k == 1)
3180 break;
3181 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3182 vec_oprnd0);
3183 }
3184 if (k == 1)
3185 vargs.safe_push (vec_oprnd0);
3186 else
3187 {
3188 vec_oprnd0 = build_constructor (atype, ctor_elts);
3189 new_stmt
b731b390 3190 = gimple_build_assign (make_ssa_name (atype),
0136f8f0
AH
3191 vec_oprnd0);
3192 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3193 vargs.safe_push (gimple_assign_lhs (new_stmt));
3194 }
3195 }
3196 }
3197 break;
3198 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3199 vargs.safe_push (op);
3200 break;
3201 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3202 if (j == 0)
3203 {
3204 gimple_seq stmts;
3205 arginfo[i].op
3206 = force_gimple_operand (arginfo[i].op, &stmts, true,
3207 NULL_TREE);
3208 if (stmts != NULL)
3209 {
3210 basic_block new_bb;
3211 edge pe = loop_preheader_edge (loop);
3212 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3213 gcc_assert (!new_bb);
3214 }
17b658af
JJ
3215 if (arginfo[i].simd_lane_linear)
3216 {
3217 vargs.safe_push (arginfo[i].op);
3218 break;
3219 }
b731b390 3220 tree phi_res = copy_ssa_name (op);
538dd0b7 3221 gphi *new_phi = create_phi_node (phi_res, loop->header);
0136f8f0 3222 set_vinfo_for_stmt (new_phi,
310213d4 3223 new_stmt_vec_info (new_phi, loop_vinfo));
0136f8f0
AH
3224 add_phi_arg (new_phi, arginfo[i].op,
3225 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3226 enum tree_code code
3227 = POINTER_TYPE_P (TREE_TYPE (op))
3228 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3229 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3230 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3231 widest_int cst
3232 = wi::mul (bestn->simdclone->args[i].linear_step,
3233 ncopies * nunits);
3234 tree tcst = wide_int_to_tree (type, cst);
b731b390 3235 tree phi_arg = copy_ssa_name (op);
0d0e4a03
JJ
3236 new_stmt
3237 = gimple_build_assign (phi_arg, code, phi_res, tcst);
0136f8f0
AH
3238 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3239 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3240 set_vinfo_for_stmt (new_stmt,
310213d4 3241 new_stmt_vec_info (new_stmt, loop_vinfo));
0136f8f0
AH
3242 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3243 UNKNOWN_LOCATION);
3244 arginfo[i].op = phi_res;
3245 vargs.safe_push (phi_res);
3246 }
3247 else
3248 {
3249 enum tree_code code
3250 = POINTER_TYPE_P (TREE_TYPE (op))
3251 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3252 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3253 ? sizetype : TREE_TYPE (op);
807e902e
KZ
3254 widest_int cst
3255 = wi::mul (bestn->simdclone->args[i].linear_step,
3256 j * nunits);
3257 tree tcst = wide_int_to_tree (type, cst);
b731b390 3258 new_temp = make_ssa_name (TREE_TYPE (op));
0d0e4a03
JJ
3259 new_stmt = gimple_build_assign (new_temp, code,
3260 arginfo[i].op, tcst);
0136f8f0
AH
3261 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3262 vargs.safe_push (new_temp);
3263 }
3264 break;
3265 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
e01d41e5
JJ
3266 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3267 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3268 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
0136f8f0
AH
3269 default:
3270 gcc_unreachable ();
3271 }
3272 }
3273
3274 new_stmt = gimple_build_call_vec (fndecl, vargs);
3275 if (vec_dest)
3276 {
3277 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3278 if (ratype)
b731b390 3279 new_temp = create_tmp_var (ratype);
0136f8f0
AH
3280 else if (TYPE_VECTOR_SUBPARTS (vectype)
3281 == TYPE_VECTOR_SUBPARTS (rtype))
3282 new_temp = make_ssa_name (vec_dest, new_stmt);
3283 else
3284 new_temp = make_ssa_name (rtype, new_stmt);
3285 gimple_call_set_lhs (new_stmt, new_temp);
3286 }
3287 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3288
3289 if (vec_dest)
3290 {
3291 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3292 {
3293 unsigned int k, l;
3294 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3295 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3296 gcc_assert ((k & (k - 1)) == 0);
3297 for (l = 0; l < k; l++)
3298 {
3299 tree t;
3300 if (ratype)
3301 {
3302 t = build_fold_addr_expr (new_temp);
3303 t = build2 (MEM_REF, vectype, t,
3304 build_int_cst (TREE_TYPE (t),
3305 l * prec / BITS_PER_UNIT));
3306 }
3307 else
3308 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3309 size_int (prec), bitsize_int (l * prec));
3310 new_stmt
b731b390 3311 = gimple_build_assign (make_ssa_name (vectype), t);
0136f8f0
AH
3312 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3313 if (j == 0 && l == 0)
3314 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3315 else
3316 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3317
3318 prev_stmt_info = vinfo_for_stmt (new_stmt);
3319 }
3320
3321 if (ratype)
3322 {
3323 tree clobber = build_constructor (ratype, NULL);
3324 TREE_THIS_VOLATILE (clobber) = 1;
3325 new_stmt = gimple_build_assign (new_temp, clobber);
3326 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3327 }
3328 continue;
3329 }
3330 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3331 {
3332 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3333 / TYPE_VECTOR_SUBPARTS (rtype));
3334 gcc_assert ((k & (k - 1)) == 0);
3335 if ((j & (k - 1)) == 0)
3336 vec_alloc (ret_ctor_elts, k);
3337 if (ratype)
3338 {
3339 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3340 for (m = 0; m < o; m++)
3341 {
3342 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3343 size_int (m), NULL_TREE, NULL_TREE);
3344 new_stmt
b731b390 3345 = gimple_build_assign (make_ssa_name (rtype), tem);
0136f8f0
AH
3346 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3347 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3348 gimple_assign_lhs (new_stmt));
3349 }
3350 tree clobber = build_constructor (ratype, NULL);
3351 TREE_THIS_VOLATILE (clobber) = 1;
3352 new_stmt = gimple_build_assign (new_temp, clobber);
3353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3354 }
3355 else
3356 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3357 if ((j & (k - 1)) != k - 1)
3358 continue;
3359 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3360 new_stmt
b731b390 3361 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
0136f8f0
AH
3362 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3363
3364 if ((unsigned) j == k - 1)
3365 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3366 else
3367 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3368
3369 prev_stmt_info = vinfo_for_stmt (new_stmt);
3370 continue;
3371 }
3372 else if (ratype)
3373 {
3374 tree t = build_fold_addr_expr (new_temp);
3375 t = build2 (MEM_REF, vectype, t,
3376 build_int_cst (TREE_TYPE (t), 0));
3377 new_stmt
b731b390 3378 = gimple_build_assign (make_ssa_name (vec_dest), t);
0136f8f0
AH
3379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3380 tree clobber = build_constructor (ratype, NULL);
3381 TREE_THIS_VOLATILE (clobber) = 1;
3382 vect_finish_stmt_generation (stmt,
3383 gimple_build_assign (new_temp,
3384 clobber), gsi);
3385 }
3386 }
3387
3388 if (j == 0)
3389 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3390 else
3391 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3392
3393 prev_stmt_info = vinfo_for_stmt (new_stmt);
3394 }
3395
3396 vargs.release ();
3397
3398 /* The call in STMT might prevent it from being removed in dce.
3399 We however cannot remove it here, due to the way the ssa name
3400 it defines is mapped to the new definition. So just replace
3401 rhs of the statement with something harmless. */
3402
3403 if (slp_node)
3404 return true;
3405
3406 if (scalar_dest)
3407 {
3408 type = TREE_TYPE (scalar_dest);
3409 if (is_pattern_stmt_p (stmt_info))
3410 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3411 else
3412 lhs = gimple_call_lhs (stmt);
3413 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3414 }
3415 else
3416 new_stmt = gimple_build_nop ();
3417 set_vinfo_for_stmt (new_stmt, stmt_info);
3418 set_vinfo_for_stmt (stmt, NULL);
3419 STMT_VINFO_STMT (stmt_info) = new_stmt;
2865f32a 3420 gsi_replace (gsi, new_stmt, true);
0136f8f0
AH
3421 unlink_stmt_vdef (stmt);
3422
3423 return true;
3424}
3425
3426
ebfd146a
IR
3427/* Function vect_gen_widened_results_half
3428
3429 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 3430 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 3431 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
3432 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3433 needs to be created (DECL is a function-decl of a target-builtin).
3434 STMT is the original scalar stmt that we are vectorizing. */
3435
355fe088 3436static gimple *
ebfd146a
IR
3437vect_gen_widened_results_half (enum tree_code code,
3438 tree decl,
3439 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3440 tree vec_dest, gimple_stmt_iterator *gsi,
355fe088 3441 gimple *stmt)
b8698a0f 3442{
355fe088 3443 gimple *new_stmt;
b8698a0f
L
3444 tree new_temp;
3445
3446 /* Generate half of the widened result: */
3447 if (code == CALL_EXPR)
3448 {
3449 /* Target specific support */
ebfd146a
IR
3450 if (op_type == binary_op)
3451 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3452 else
3453 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3454 new_temp = make_ssa_name (vec_dest, new_stmt);
3455 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
3456 }
3457 else
ebfd146a 3458 {
b8698a0f
L
3459 /* Generic support */
3460 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
3461 if (op_type != binary_op)
3462 vec_oprnd1 = NULL;
0d0e4a03 3463 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
ebfd146a
IR
3464 new_temp = make_ssa_name (vec_dest, new_stmt);
3465 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 3466 }
ebfd146a
IR
3467 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3468
ebfd146a
IR
3469 return new_stmt;
3470}
3471
4a00c761
JJ
3472
3473/* Get vectorized definitions for loop-based vectorization. For the first
3474 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3475 scalar operand), and for the rest we get a copy with
3476 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3477 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3478 The vectors are collected into VEC_OPRNDS. */
3479
3480static void
355fe088 3481vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
9771b263 3482 vec<tree> *vec_oprnds, int multi_step_cvt)
4a00c761
JJ
3483{
3484 tree vec_oprnd;
3485
3486 /* Get first vector operand. */
3487 /* All the vector operands except the very first one (that is scalar oprnd)
3488 are stmt copies. */
3489 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
81c40241 3490 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4a00c761
JJ
3491 else
3492 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3493
9771b263 3494 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3495
3496 /* Get second vector operand. */
3497 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
9771b263 3498 vec_oprnds->quick_push (vec_oprnd);
4a00c761
JJ
3499
3500 *oprnd = vec_oprnd;
3501
3502 /* For conversion in multiple steps, continue to get operands
3503 recursively. */
3504 if (multi_step_cvt)
3505 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3506}
3507
3508
3509/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3510 For multi-step conversions store the resulting vectors and call the function
3511 recursively. */
3512
3513static void
9771b263 3514vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
355fe088 3515 int multi_step_cvt, gimple *stmt,
9771b263 3516 vec<tree> vec_dsts,
4a00c761
JJ
3517 gimple_stmt_iterator *gsi,
3518 slp_tree slp_node, enum tree_code code,
3519 stmt_vec_info *prev_stmt_info)
3520{
3521 unsigned int i;
3522 tree vop0, vop1, new_tmp, vec_dest;
355fe088 3523 gimple *new_stmt;
4a00c761
JJ
3524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3525
9771b263 3526 vec_dest = vec_dsts.pop ();
4a00c761 3527
9771b263 3528 for (i = 0; i < vec_oprnds->length (); i += 2)
4a00c761
JJ
3529 {
3530 /* Create demotion operation. */
9771b263
DN
3531 vop0 = (*vec_oprnds)[i];
3532 vop1 = (*vec_oprnds)[i + 1];
0d0e4a03 3533 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4a00c761
JJ
3534 new_tmp = make_ssa_name (vec_dest, new_stmt);
3535 gimple_assign_set_lhs (new_stmt, new_tmp);
3536 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3537
3538 if (multi_step_cvt)
3539 /* Store the resulting vector for next recursive call. */
9771b263 3540 (*vec_oprnds)[i/2] = new_tmp;
4a00c761
JJ
3541 else
3542 {
3543 /* This is the last step of the conversion sequence. Store the
3544 vectors in SLP_NODE or in vector info of the scalar statement
3545 (or in STMT_VINFO_RELATED_STMT chain). */
3546 if (slp_node)
9771b263 3547 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 3548 else
c689ce1e
RB
3549 {
3550 if (!*prev_stmt_info)
3551 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3552 else
3553 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4a00c761 3554
c689ce1e
RB
3555 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3556 }
4a00c761
JJ
3557 }
3558 }
3559
3560 /* For multi-step demotion operations we first generate demotion operations
3561 from the source type to the intermediate types, and then combine the
3562 results (stored in VEC_OPRNDS) in demotion operation to the destination
3563 type. */
3564 if (multi_step_cvt)
3565 {
3566 /* At each level of recursion we have half of the operands we had at the
3567 previous level. */
9771b263 3568 vec_oprnds->truncate ((i+1)/2);
4a00c761
JJ
3569 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3570 stmt, vec_dsts, gsi, slp_node,
3571 VEC_PACK_TRUNC_EXPR,
3572 prev_stmt_info);
3573 }
3574
9771b263 3575 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3576}
3577
3578
3579/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3580 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3581 the resulting vectors and call the function recursively. */
3582
3583static void
9771b263
DN
3584vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3585 vec<tree> *vec_oprnds1,
355fe088 3586 gimple *stmt, tree vec_dest,
4a00c761
JJ
3587 gimple_stmt_iterator *gsi,
3588 enum tree_code code1,
3589 enum tree_code code2, tree decl1,
3590 tree decl2, int op_type)
3591{
3592 int i;
3593 tree vop0, vop1, new_tmp1, new_tmp2;
355fe088 3594 gimple *new_stmt1, *new_stmt2;
6e1aa848 3595 vec<tree> vec_tmp = vNULL;
4a00c761 3596
9771b263
DN
3597 vec_tmp.create (vec_oprnds0->length () * 2);
3598 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4a00c761
JJ
3599 {
3600 if (op_type == binary_op)
9771b263 3601 vop1 = (*vec_oprnds1)[i];
4a00c761
JJ
3602 else
3603 vop1 = NULL_TREE;
3604
3605 /* Generate the two halves of promotion operation. */
3606 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3607 op_type, vec_dest, gsi, stmt);
3608 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3609 op_type, vec_dest, gsi, stmt);
3610 if (is_gimple_call (new_stmt1))
3611 {
3612 new_tmp1 = gimple_call_lhs (new_stmt1);
3613 new_tmp2 = gimple_call_lhs (new_stmt2);
3614 }
3615 else
3616 {
3617 new_tmp1 = gimple_assign_lhs (new_stmt1);
3618 new_tmp2 = gimple_assign_lhs (new_stmt2);
3619 }
3620
3621 /* Store the results for the next step. */
9771b263
DN
3622 vec_tmp.quick_push (new_tmp1);
3623 vec_tmp.quick_push (new_tmp2);
4a00c761
JJ
3624 }
3625
689eaba3 3626 vec_oprnds0->release ();
4a00c761
JJ
3627 *vec_oprnds0 = vec_tmp;
3628}
3629
3630
b8698a0f
L
3631/* Check if STMT performs a conversion operation, that can be vectorized.
3632 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4a00c761 3633 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
3634 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3635
3636static bool
355fe088
TS
3637vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3638 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
3639{
3640 tree vec_dest;
3641 tree scalar_dest;
4a00c761 3642 tree op0, op1 = NULL_TREE;
ebfd146a
IR
3643 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3644 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3646 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4a00c761 3647 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
ebfd146a
IR
3648 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3649 tree new_temp;
355fe088 3650 gimple *def_stmt;
ebfd146a 3651 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
355fe088 3652 gimple *new_stmt = NULL;
ebfd146a
IR
3653 stmt_vec_info prev_stmt_info;
3654 int nunits_in;
3655 int nunits_out;
3656 tree vectype_out, vectype_in;
4a00c761
JJ
3657 int ncopies, i, j;
3658 tree lhs_type, rhs_type;
ebfd146a 3659 enum { NARROW, NONE, WIDEN } modifier;
6e1aa848
DN
3660 vec<tree> vec_oprnds0 = vNULL;
3661 vec<tree> vec_oprnds1 = vNULL;
ebfd146a 3662 tree vop0;
4a00c761 3663 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 3664 vec_info *vinfo = stmt_info->vinfo;
4a00c761 3665 int multi_step_cvt = 0;
6e1aa848
DN
3666 vec<tree> vec_dsts = vNULL;
3667 vec<tree> interm_types = vNULL;
4a00c761
JJ
3668 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3669 int op_type;
ef4bddc2 3670 machine_mode rhs_mode;
4a00c761 3671 unsigned short fltsz;
ebfd146a
IR
3672
3673 /* Is STMT a vectorizable conversion? */
3674
4a00c761 3675 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3676 return false;
3677
66c16fd9
RB
3678 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3679 && ! vec_stmt)
ebfd146a
IR
3680 return false;
3681
3682 if (!is_gimple_assign (stmt))
3683 return false;
3684
3685 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3686 return false;
3687
3688 code = gimple_assign_rhs_code (stmt);
4a00c761
JJ
3689 if (!CONVERT_EXPR_CODE_P (code)
3690 && code != FIX_TRUNC_EXPR
3691 && code != FLOAT_EXPR
3692 && code != WIDEN_MULT_EXPR
3693 && code != WIDEN_LSHIFT_EXPR)
ebfd146a
IR
3694 return false;
3695
4a00c761
JJ
3696 op_type = TREE_CODE_LENGTH (code);
3697
ebfd146a 3698 /* Check types of lhs and rhs. */
b690cc0f 3699 scalar_dest = gimple_assign_lhs (stmt);
4a00c761 3700 lhs_type = TREE_TYPE (scalar_dest);
b690cc0f
RG
3701 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3702
ebfd146a
IR
3703 op0 = gimple_assign_rhs1 (stmt);
3704 rhs_type = TREE_TYPE (op0);
4a00c761
JJ
3705
3706 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3707 && !((INTEGRAL_TYPE_P (lhs_type)
3708 && INTEGRAL_TYPE_P (rhs_type))
3709 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3710 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3711 return false;
3712
e6f5c25d
IE
3713 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3714 && ((INTEGRAL_TYPE_P (lhs_type)
3715 && (TYPE_PRECISION (lhs_type)
3716 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3717 || (INTEGRAL_TYPE_P (rhs_type)
3718 && (TYPE_PRECISION (rhs_type)
3719 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4a00c761 3720 {
73fbfcad 3721 if (dump_enabled_p ())
78c60e3d 3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
3723 "type conversion to/from bit-precision unsupported."
3724 "\n");
4a00c761
JJ
3725 return false;
3726 }
3727
b690cc0f 3728 /* Check the operands of the operation. */
81c40241 3729 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
b690cc0f 3730 {
73fbfcad 3731 if (dump_enabled_p ())
78c60e3d 3732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3733 "use not simple.\n");
b690cc0f
RG
3734 return false;
3735 }
4a00c761
JJ
3736 if (op_type == binary_op)
3737 {
3738 bool ok;
3739
3740 op1 = gimple_assign_rhs2 (stmt);
3741 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3742 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3743 OP1. */
3744 if (CONSTANT_CLASS_P (op0))
81c40241 3745 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4a00c761 3746 else
81c40241 3747 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4a00c761
JJ
3748
3749 if (!ok)
3750 {
73fbfcad 3751 if (dump_enabled_p ())
78c60e3d 3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3753 "use not simple.\n");
4a00c761
JJ
3754 return false;
3755 }
3756 }
3757
b690cc0f
RG
3758 /* If op0 is an external or constant defs use a vector type of
3759 the same size as the output vector type. */
ebfd146a 3760 if (!vectype_in)
b690cc0f 3761 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
3762 if (vec_stmt)
3763 gcc_assert (vectype_in);
3764 if (!vectype_in)
3765 {
73fbfcad 3766 if (dump_enabled_p ())
4a00c761 3767 {
78c60e3d
SS
3768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3769 "no vectype for scalar type ");
3770 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
e645e942 3771 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4a00c761 3772 }
7d8930a0
IR
3773
3774 return false;
3775 }
ebfd146a 3776
e6f5c25d
IE
3777 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3778 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3779 {
3780 if (dump_enabled_p ())
3781 {
3782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3783 "can't convert between boolean and non "
3784 "boolean vectors");
3785 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3786 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3787 }
3788
3789 return false;
3790 }
3791
b690cc0f
RG
3792 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3793 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4a00c761 3794 if (nunits_in < nunits_out)
ebfd146a
IR
3795 modifier = NARROW;
3796 else if (nunits_out == nunits_in)
3797 modifier = NONE;
ebfd146a 3798 else
4a00c761 3799 modifier = WIDEN;
ebfd146a 3800
ff802fa1
IR
3801 /* Multiple types in SLP are handled by creating the appropriate number of
3802 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3803 case of SLP. */
437f4a00 3804 if (slp_node || PURE_SLP_STMT (stmt_info))
ebfd146a 3805 ncopies = 1;
4a00c761
JJ
3806 else if (modifier == NARROW)
3807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3808 else
3809 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
b8698a0f 3810
ebfd146a
IR
3811 /* Sanity check: make sure that at least one copy of the vectorized stmt
3812 needs to be generated. */
3813 gcc_assert (ncopies >= 1);
3814
ebfd146a 3815 /* Supportable by target? */
4a00c761 3816 switch (modifier)
ebfd146a 3817 {
4a00c761
JJ
3818 case NONE:
3819 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3820 return false;
3821 if (supportable_convert_operation (code, vectype_out, vectype_in,
3822 &decl1, &code1))
3823 break;
3824 /* FALLTHRU */
3825 unsupported:
73fbfcad 3826 if (dump_enabled_p ())
78c60e3d 3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3828 "conversion not supported by target.\n");
ebfd146a 3829 return false;
ebfd146a 3830
4a00c761
JJ
3831 case WIDEN:
3832 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
a86ec597
RH
3833 &code1, &code2, &multi_step_cvt,
3834 &interm_types))
4a00c761
JJ
3835 {
3836 /* Binary widening operation can only be supported directly by the
3837 architecture. */
3838 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3839 break;
3840 }
3841
3842 if (code != FLOAT_EXPR
3843 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3844 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3845 goto unsupported;
3846
3847 rhs_mode = TYPE_MODE (rhs_type);
3848 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3849 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3850 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3851 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3852 {
3853 cvt_type
3854 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3855 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3856 if (cvt_type == NULL_TREE)
3857 goto unsupported;
3858
3859 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3860 {
3861 if (!supportable_convert_operation (code, vectype_out,
3862 cvt_type, &decl1, &codecvt1))
3863 goto unsupported;
3864 }
3865 else if (!supportable_widening_operation (code, stmt, vectype_out,
a86ec597
RH
3866 cvt_type, &codecvt1,
3867 &codecvt2, &multi_step_cvt,
4a00c761
JJ
3868 &interm_types))
3869 continue;
3870 else
3871 gcc_assert (multi_step_cvt == 0);
3872
3873 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
a86ec597
RH
3874 vectype_in, &code1, &code2,
3875 &multi_step_cvt, &interm_types))
4a00c761
JJ
3876 break;
3877 }
3878
3879 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3880 goto unsupported;
3881
3882 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3883 codecvt2 = ERROR_MARK;
3884 else
3885 {
3886 multi_step_cvt++;
9771b263 3887 interm_types.safe_push (cvt_type);
4a00c761
JJ
3888 cvt_type = NULL_TREE;
3889 }
3890 break;
3891
3892 case NARROW:
3893 gcc_assert (op_type == unary_op);
3894 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3895 &code1, &multi_step_cvt,
3896 &interm_types))
3897 break;
3898
3899 if (code != FIX_TRUNC_EXPR
3900 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3901 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3902 goto unsupported;
3903
3904 rhs_mode = TYPE_MODE (rhs_type);
3905 cvt_type
3906 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3907 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3908 if (cvt_type == NULL_TREE)
3909 goto unsupported;
3910 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3911 &decl1, &codecvt1))
3912 goto unsupported;
3913 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3914 &code1, &multi_step_cvt,
3915 &interm_types))
3916 break;
3917 goto unsupported;
3918
3919 default:
3920 gcc_unreachable ();
ebfd146a
IR
3921 }
3922
3923 if (!vec_stmt) /* transformation not required. */
3924 {
73fbfcad 3925 if (dump_enabled_p ())
78c60e3d 3926 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3927 "=== vectorizable_conversion ===\n");
4a00c761 3928 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
8bd37302
BS
3929 {
3930 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
c3e7ee41 3931 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
8bd37302 3932 }
4a00c761
JJ
3933 else if (modifier == NARROW)
3934 {
3935 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
8bd37302 3936 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761
JJ
3937 }
3938 else
3939 {
3940 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
8bd37302 3941 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4a00c761 3942 }
9771b263 3943 interm_types.release ();
ebfd146a
IR
3944 return true;
3945 }
3946
3947 /** Transform. **/
73fbfcad 3948 if (dump_enabled_p ())
78c60e3d 3949 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3950 "transform conversion. ncopies = %d.\n", ncopies);
ebfd146a 3951
4a00c761
JJ
3952 if (op_type == binary_op)
3953 {
3954 if (CONSTANT_CLASS_P (op0))
3955 op0 = fold_convert (TREE_TYPE (op1), op0);
3956 else if (CONSTANT_CLASS_P (op1))
3957 op1 = fold_convert (TREE_TYPE (op0), op1);
3958 }
3959
3960 /* In case of multi-step conversion, we first generate conversion operations
3961 to the intermediate types, and then from that types to the final one.
3962 We create vector destinations for the intermediate type (TYPES) received
3963 from supportable_*_operation, and store them in the correct order
3964 for future use in vect_create_vectorized_*_stmts (). */
9771b263 3965 vec_dsts.create (multi_step_cvt + 1);
82294ec1
JJ
3966 vec_dest = vect_create_destination_var (scalar_dest,
3967 (cvt_type && modifier == WIDEN)
3968 ? cvt_type : vectype_out);
9771b263 3969 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3970
3971 if (multi_step_cvt)
3972 {
9771b263
DN
3973 for (i = interm_types.length () - 1;
3974 interm_types.iterate (i, &intermediate_type); i--)
4a00c761
JJ
3975 {
3976 vec_dest = vect_create_destination_var (scalar_dest,
3977 intermediate_type);
9771b263 3978 vec_dsts.quick_push (vec_dest);
4a00c761
JJ
3979 }
3980 }
ebfd146a 3981
4a00c761 3982 if (cvt_type)
82294ec1
JJ
3983 vec_dest = vect_create_destination_var (scalar_dest,
3984 modifier == WIDEN
3985 ? vectype_out : cvt_type);
4a00c761
JJ
3986
3987 if (!slp_node)
3988 {
30862efc 3989 if (modifier == WIDEN)
4a00c761 3990 {
c3284718 3991 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4a00c761 3992 if (op_type == binary_op)
9771b263 3993 vec_oprnds1.create (1);
4a00c761 3994 }
30862efc 3995 else if (modifier == NARROW)
9771b263
DN
3996 vec_oprnds0.create (
3997 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4a00c761
JJ
3998 }
3999 else if (code == WIDEN_LSHIFT_EXPR)
9771b263 4000 vec_oprnds1.create (slp_node->vec_stmts_size);
ebfd146a 4001
4a00c761 4002 last_oprnd = op0;
ebfd146a
IR
4003 prev_stmt_info = NULL;
4004 switch (modifier)
4005 {
4006 case NONE:
4007 for (j = 0; j < ncopies; j++)
4008 {
ebfd146a 4009 if (j == 0)
d092494c
IR
4010 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4011 -1);
ebfd146a
IR
4012 else
4013 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4014
9771b263 4015 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4016 {
4017 /* Arguments are ready, create the new vector stmt. */
4018 if (code1 == CALL_EXPR)
4019 {
4020 new_stmt = gimple_build_call (decl1, 1, vop0);
4021 new_temp = make_ssa_name (vec_dest, new_stmt);
4022 gimple_call_set_lhs (new_stmt, new_temp);
4023 }
4024 else
4025 {
4026 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
0d0e4a03 4027 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4a00c761
JJ
4028 new_temp = make_ssa_name (vec_dest, new_stmt);
4029 gimple_assign_set_lhs (new_stmt, new_temp);
4030 }
4031
4032 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4033 if (slp_node)
9771b263 4034 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
225ce44b
RB
4035 else
4036 {
4037 if (!prev_stmt_info)
4038 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4039 else
4040 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4041 prev_stmt_info = vinfo_for_stmt (new_stmt);
4042 }
4a00c761 4043 }
ebfd146a
IR
4044 }
4045 break;
4046
4047 case WIDEN:
4048 /* In case the vectorization factor (VF) is bigger than the number
4049 of elements that we can fit in a vectype (nunits), we have to
4050 generate more than one vector stmt - i.e - we need to "unroll"
4051 the vector stmt by a factor VF/nunits. */
4052 for (j = 0; j < ncopies; j++)
4053 {
4a00c761 4054 /* Handle uses. */
ebfd146a 4055 if (j == 0)
4a00c761
JJ
4056 {
4057 if (slp_node)
4058 {
4059 if (code == WIDEN_LSHIFT_EXPR)
4060 {
4061 unsigned int k;
ebfd146a 4062
4a00c761
JJ
4063 vec_oprnd1 = op1;
4064 /* Store vec_oprnd1 for every vector stmt to be created
4065 for SLP_NODE. We check during the analysis that all
4066 the shift arguments are the same. */
4067 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4068 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4069
4070 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4071 slp_node, -1);
4072 }
4073 else
4074 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4075 &vec_oprnds1, slp_node, -1);
4076 }
4077 else
4078 {
81c40241 4079 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
9771b263 4080 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4081 if (op_type == binary_op)
4082 {
4083 if (code == WIDEN_LSHIFT_EXPR)
4084 vec_oprnd1 = op1;
4085 else
81c40241 4086 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
9771b263 4087 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4088 }
4089 }
4090 }
ebfd146a 4091 else
4a00c761
JJ
4092 {
4093 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
9771b263
DN
4094 vec_oprnds0.truncate (0);
4095 vec_oprnds0.quick_push (vec_oprnd0);
4a00c761
JJ
4096 if (op_type == binary_op)
4097 {
4098 if (code == WIDEN_LSHIFT_EXPR)
4099 vec_oprnd1 = op1;
4100 else
4101 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4102 vec_oprnd1);
9771b263
DN
4103 vec_oprnds1.truncate (0);
4104 vec_oprnds1.quick_push (vec_oprnd1);
4a00c761
JJ
4105 }
4106 }
ebfd146a 4107
4a00c761
JJ
4108 /* Arguments are ready. Create the new vector stmts. */
4109 for (i = multi_step_cvt; i >= 0; i--)
4110 {
9771b263 4111 tree this_dest = vec_dsts[i];
4a00c761
JJ
4112 enum tree_code c1 = code1, c2 = code2;
4113 if (i == 0 && codecvt2 != ERROR_MARK)
4114 {
4115 c1 = codecvt1;
4116 c2 = codecvt2;
4117 }
4118 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4119 &vec_oprnds1,
4120 stmt, this_dest, gsi,
4121 c1, c2, decl1, decl2,
4122 op_type);
4123 }
4124
9771b263 4125 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4126 {
4127 if (cvt_type)
4128 {
4129 if (codecvt1 == CALL_EXPR)
4130 {
4131 new_stmt = gimple_build_call (decl1, 1, vop0);
4132 new_temp = make_ssa_name (vec_dest, new_stmt);
4133 gimple_call_set_lhs (new_stmt, new_temp);
4134 }
4135 else
4136 {
4137 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4138 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4139 new_stmt = gimple_build_assign (new_temp, codecvt1,
4140 vop0);
4a00c761
JJ
4141 }
4142
4143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4144 }
4145 else
4146 new_stmt = SSA_NAME_DEF_STMT (vop0);
4147
4148 if (slp_node)
9771b263 4149 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4a00c761 4150 else
c689ce1e
RB
4151 {
4152 if (!prev_stmt_info)
4153 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4154 else
4155 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4156 prev_stmt_info = vinfo_for_stmt (new_stmt);
4157 }
4a00c761 4158 }
ebfd146a 4159 }
4a00c761
JJ
4160
4161 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
4162 break;
4163
4164 case NARROW:
4165 /* In case the vectorization factor (VF) is bigger than the number
4166 of elements that we can fit in a vectype (nunits), we have to
4167 generate more than one vector stmt - i.e - we need to "unroll"
4168 the vector stmt by a factor VF/nunits. */
4169 for (j = 0; j < ncopies; j++)
4170 {
4171 /* Handle uses. */
4a00c761
JJ
4172 if (slp_node)
4173 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4174 slp_node, -1);
ebfd146a
IR
4175 else
4176 {
9771b263 4177 vec_oprnds0.truncate (0);
4a00c761
JJ
4178 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4179 vect_pow2 (multi_step_cvt) - 1);
ebfd146a
IR
4180 }
4181
4a00c761
JJ
4182 /* Arguments are ready. Create the new vector stmts. */
4183 if (cvt_type)
9771b263 4184 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4a00c761
JJ
4185 {
4186 if (codecvt1 == CALL_EXPR)
4187 {
4188 new_stmt = gimple_build_call (decl1, 1, vop0);
4189 new_temp = make_ssa_name (vec_dest, new_stmt);
4190 gimple_call_set_lhs (new_stmt, new_temp);
4191 }
4192 else
4193 {
4194 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
b731b390 4195 new_temp = make_ssa_name (vec_dest);
0d0e4a03
JJ
4196 new_stmt = gimple_build_assign (new_temp, codecvt1,
4197 vop0);
4a00c761 4198 }
ebfd146a 4199
4a00c761 4200 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9771b263 4201 vec_oprnds0[i] = new_temp;
4a00c761 4202 }
ebfd146a 4203
4a00c761
JJ
4204 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4205 stmt, vec_dsts, gsi,
4206 slp_node, code1,
4207 &prev_stmt_info);
ebfd146a
IR
4208 }
4209
4210 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4a00c761 4211 break;
ebfd146a
IR
4212 }
4213
9771b263
DN
4214 vec_oprnds0.release ();
4215 vec_oprnds1.release ();
4216 vec_dsts.release ();
4217 interm_types.release ();
ebfd146a
IR
4218
4219 return true;
4220}
ff802fa1
IR
4221
4222
ebfd146a
IR
4223/* Function vectorizable_assignment.
4224
b8698a0f
L
4225 Check if STMT performs an assignment (copy) that can be vectorized.
4226 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4227 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4228 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4229
4230static bool
355fe088
TS
4231vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4232 gimple **vec_stmt, slp_tree slp_node)
ebfd146a
IR
4233{
4234 tree vec_dest;
4235 tree scalar_dest;
4236 tree op;
4237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a
IR
4238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4239 tree new_temp;
355fe088 4240 gimple *def_stmt;
ebfd146a 4241 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
ebfd146a 4242 int ncopies;
f18b55bd 4243 int i, j;
6e1aa848 4244 vec<tree> vec_oprnds = vNULL;
ebfd146a 4245 tree vop;
a70d6342 4246 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4247 vec_info *vinfo = stmt_info->vinfo;
355fe088 4248 gimple *new_stmt = NULL;
f18b55bd 4249 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
4250 enum tree_code code;
4251 tree vectype_in;
ebfd146a 4252
a70d6342 4253 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4254 return false;
4255
66c16fd9
RB
4256 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4257 && ! vec_stmt)
ebfd146a
IR
4258 return false;
4259
4260 /* Is vectorizable assignment? */
4261 if (!is_gimple_assign (stmt))
4262 return false;
4263
4264 scalar_dest = gimple_assign_lhs (stmt);
4265 if (TREE_CODE (scalar_dest) != SSA_NAME)
4266 return false;
4267
fde9c428 4268 code = gimple_assign_rhs_code (stmt);
ebfd146a 4269 if (gimple_assign_single_p (stmt)
fde9c428
RG
4270 || code == PAREN_EXPR
4271 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
4272 op = gimple_assign_rhs1 (stmt);
4273 else
4274 return false;
4275
7b7ec6c5
RG
4276 if (code == VIEW_CONVERT_EXPR)
4277 op = TREE_OPERAND (op, 0);
4278
465c8c19
JJ
4279 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4280 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4281
4282 /* Multiple types in SLP are handled by creating the appropriate number of
4283 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4284 case of SLP. */
4285 if (slp_node || PURE_SLP_STMT (stmt_info))
4286 ncopies = 1;
4287 else
4288 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4289
4290 gcc_assert (ncopies >= 1);
4291
81c40241 4292 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
ebfd146a 4293 {
73fbfcad 4294 if (dump_enabled_p ())
78c60e3d 4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4296 "use not simple.\n");
ebfd146a
IR
4297 return false;
4298 }
4299
fde9c428
RG
4300 /* We can handle NOP_EXPR conversions that do not change the number
4301 of elements or the vector size. */
7b7ec6c5
RG
4302 if ((CONVERT_EXPR_CODE_P (code)
4303 || code == VIEW_CONVERT_EXPR)
fde9c428
RG
4304 && (!vectype_in
4305 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4306 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4307 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4308 return false;
4309
7b7b1813
RG
4310 /* We do not handle bit-precision changes. */
4311 if ((CONVERT_EXPR_CODE_P (code)
4312 || code == VIEW_CONVERT_EXPR)
4313 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4314 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4315 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4316 || ((TYPE_PRECISION (TREE_TYPE (op))
4317 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4318 /* But a conversion that does not change the bit-pattern is ok. */
4319 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4320 > TYPE_PRECISION (TREE_TYPE (op)))
2dab46d5
IE
4321 && TYPE_UNSIGNED (TREE_TYPE (op)))
4322 /* Conversion between boolean types of different sizes is
4323 a simple assignment in case their vectypes are same
4324 boolean vectors. */
4325 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4326 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
7b7b1813 4327 {
73fbfcad 4328 if (dump_enabled_p ())
78c60e3d
SS
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4330 "type conversion to/from bit-precision "
e645e942 4331 "unsupported.\n");
7b7b1813
RG
4332 return false;
4333 }
4334
ebfd146a
IR
4335 if (!vec_stmt) /* transformation not required. */
4336 {
4337 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
73fbfcad 4338 if (dump_enabled_p ())
78c60e3d 4339 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4340 "=== vectorizable_assignment ===\n");
c3e7ee41 4341 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
4342 return true;
4343 }
4344
4345 /** Transform. **/
73fbfcad 4346 if (dump_enabled_p ())
e645e942 4347 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
ebfd146a
IR
4348
4349 /* Handle def. */
4350 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4351
4352 /* Handle use. */
f18b55bd 4353 for (j = 0; j < ncopies; j++)
ebfd146a 4354 {
f18b55bd
IR
4355 /* Handle uses. */
4356 if (j == 0)
d092494c 4357 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
f18b55bd
IR
4358 else
4359 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4360
4361 /* Arguments are ready. create the new vector stmt. */
9771b263 4362 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
f18b55bd 4363 {
7b7ec6c5
RG
4364 if (CONVERT_EXPR_CODE_P (code)
4365 || code == VIEW_CONVERT_EXPR)
4a73490d 4366 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
4367 new_stmt = gimple_build_assign (vec_dest, vop);
4368 new_temp = make_ssa_name (vec_dest, new_stmt);
4369 gimple_assign_set_lhs (new_stmt, new_temp);
4370 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4371 if (slp_node)
9771b263 4372 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f18b55bd 4373 }
ebfd146a
IR
4374
4375 if (slp_node)
f18b55bd
IR
4376 continue;
4377
4378 if (j == 0)
4379 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4380 else
4381 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4382
4383 prev_stmt_info = vinfo_for_stmt (new_stmt);
4384 }
b8698a0f 4385
9771b263 4386 vec_oprnds.release ();
ebfd146a
IR
4387 return true;
4388}
4389
9dc3f7de 4390
1107f3ae
IR
4391/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4392 either as shift by a scalar or by a vector. */
4393
4394bool
4395vect_supportable_shift (enum tree_code code, tree scalar_type)
4396{
4397
ef4bddc2 4398 machine_mode vec_mode;
1107f3ae
IR
4399 optab optab;
4400 int icode;
4401 tree vectype;
4402
4403 vectype = get_vectype_for_scalar_type (scalar_type);
4404 if (!vectype)
4405 return false;
4406
4407 optab = optab_for_tree_code (code, vectype, optab_scalar);
4408 if (!optab
4409 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4410 {
4411 optab = optab_for_tree_code (code, vectype, optab_vector);
4412 if (!optab
4413 || (optab_handler (optab, TYPE_MODE (vectype))
4414 == CODE_FOR_nothing))
4415 return false;
4416 }
4417
4418 vec_mode = TYPE_MODE (vectype);
4419 icode = (int) optab_handler (optab, vec_mode);
4420 if (icode == CODE_FOR_nothing)
4421 return false;
4422
4423 return true;
4424}
4425
4426
9dc3f7de
IR
4427/* Function vectorizable_shift.
4428
4429 Check if STMT performs a shift operation that can be vectorized.
4430 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4431 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4432 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4433
4434static bool
355fe088
TS
4435vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4436 gimple **vec_stmt, slp_tree slp_node)
9dc3f7de
IR
4437{
4438 tree vec_dest;
4439 tree scalar_dest;
4440 tree op0, op1 = NULL;
4441 tree vec_oprnd1 = NULL_TREE;
4442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4443 tree vectype;
4444 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4445 enum tree_code code;
ef4bddc2 4446 machine_mode vec_mode;
9dc3f7de
IR
4447 tree new_temp;
4448 optab optab;
4449 int icode;
ef4bddc2 4450 machine_mode optab_op2_mode;
355fe088 4451 gimple *def_stmt;
9dc3f7de 4452 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
355fe088 4453 gimple *new_stmt = NULL;
9dc3f7de
IR
4454 stmt_vec_info prev_stmt_info;
4455 int nunits_in;
4456 int nunits_out;
4457 tree vectype_out;
cede2577 4458 tree op1_vectype;
9dc3f7de
IR
4459 int ncopies;
4460 int j, i;
6e1aa848
DN
4461 vec<tree> vec_oprnds0 = vNULL;
4462 vec<tree> vec_oprnds1 = vNULL;
9dc3f7de
IR
4463 tree vop0, vop1;
4464 unsigned int k;
49eab32e 4465 bool scalar_shift_arg = true;
9dc3f7de 4466 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4467 vec_info *vinfo = stmt_info->vinfo;
9dc3f7de
IR
4468 int vf;
4469
4470 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4471 return false;
4472
66c16fd9
RB
4473 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4474 && ! vec_stmt)
9dc3f7de
IR
4475 return false;
4476
4477 /* Is STMT a vectorizable binary/unary operation? */
4478 if (!is_gimple_assign (stmt))
4479 return false;
4480
4481 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4482 return false;
4483
4484 code = gimple_assign_rhs_code (stmt);
4485
4486 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4487 || code == RROTATE_EXPR))
4488 return false;
4489
4490 scalar_dest = gimple_assign_lhs (stmt);
4491 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
7b7b1813
RG
4492 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4493 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4494 {
73fbfcad 4495 if (dump_enabled_p ())
78c60e3d 4496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4497 "bit-precision shifts not supported.\n");
7b7b1813
RG
4498 return false;
4499 }
9dc3f7de
IR
4500
4501 op0 = gimple_assign_rhs1 (stmt);
81c40241 4502 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
9dc3f7de 4503 {
73fbfcad 4504 if (dump_enabled_p ())
78c60e3d 4505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4506 "use not simple.\n");
9dc3f7de
IR
4507 return false;
4508 }
4509 /* If op0 is an external or constant def use a vector type with
4510 the same size as the output vector type. */
4511 if (!vectype)
4512 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4513 if (vec_stmt)
4514 gcc_assert (vectype);
4515 if (!vectype)
4516 {
73fbfcad 4517 if (dump_enabled_p ())
78c60e3d 4518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4519 "no vectype for scalar type\n");
9dc3f7de
IR
4520 return false;
4521 }
4522
4523 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4524 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4525 if (nunits_out != nunits_in)
4526 return false;
4527
4528 op1 = gimple_assign_rhs2 (stmt);
81c40241 4529 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
9dc3f7de 4530 {
73fbfcad 4531 if (dump_enabled_p ())
78c60e3d 4532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4533 "use not simple.\n");
9dc3f7de
IR
4534 return false;
4535 }
4536
4537 if (loop_vinfo)
4538 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4539 else
4540 vf = 1;
4541
4542 /* Multiple types in SLP are handled by creating the appropriate number of
4543 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4544 case of SLP. */
437f4a00 4545 if (slp_node || PURE_SLP_STMT (stmt_info))
9dc3f7de
IR
4546 ncopies = 1;
4547 else
4548 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4549
4550 gcc_assert (ncopies >= 1);
4551
4552 /* Determine whether the shift amount is a vector, or scalar. If the
4553 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4554
dbfa87aa
YR
4555 if ((dt[1] == vect_internal_def
4556 || dt[1] == vect_induction_def)
4557 && !slp_node)
49eab32e
JJ
4558 scalar_shift_arg = false;
4559 else if (dt[1] == vect_constant_def
4560 || dt[1] == vect_external_def
4561 || dt[1] == vect_internal_def)
4562 {
4563 /* In SLP, need to check whether the shift count is the same,
4564 in loops if it is a constant or invariant, it is always
4565 a scalar shift. */
4566 if (slp_node)
4567 {
355fe088
TS
4568 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4569 gimple *slpstmt;
49eab32e 4570
9771b263 4571 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
49eab32e
JJ
4572 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4573 scalar_shift_arg = false;
4574 }
4575 }
4576 else
4577 {
73fbfcad 4578 if (dump_enabled_p ())
78c60e3d 4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4580 "operand mode requires invariant argument.\n");
49eab32e
JJ
4581 return false;
4582 }
4583
9dc3f7de 4584 /* Vector shifted by vector. */
49eab32e 4585 if (!scalar_shift_arg)
9dc3f7de
IR
4586 {
4587 optab = optab_for_tree_code (code, vectype, optab_vector);
73fbfcad 4588 if (dump_enabled_p ())
78c60e3d 4589 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4590 "vector/vector shift/rotate found.\n");
78c60e3d 4591
aa948027
JJ
4592 if (!op1_vectype)
4593 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4594 if (op1_vectype == NULL_TREE
4595 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
cede2577 4596 {
73fbfcad 4597 if (dump_enabled_p ())
78c60e3d
SS
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4599 "unusable type for last operand in"
e645e942 4600 " vector/vector shift/rotate.\n");
cede2577
JJ
4601 return false;
4602 }
9dc3f7de
IR
4603 }
4604 /* See if the machine has a vector shifted by scalar insn and if not
4605 then see if it has a vector shifted by vector insn. */
49eab32e 4606 else
9dc3f7de
IR
4607 {
4608 optab = optab_for_tree_code (code, vectype, optab_scalar);
4609 if (optab
4610 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4611 {
73fbfcad 4612 if (dump_enabled_p ())
78c60e3d 4613 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4614 "vector/scalar shift/rotate found.\n");
9dc3f7de
IR
4615 }
4616 else
4617 {
4618 optab = optab_for_tree_code (code, vectype, optab_vector);
4619 if (optab
4620 && (optab_handler (optab, TYPE_MODE (vectype))
4621 != CODE_FOR_nothing))
4622 {
49eab32e
JJ
4623 scalar_shift_arg = false;
4624
73fbfcad 4625 if (dump_enabled_p ())
78c60e3d 4626 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4627 "vector/vector shift/rotate found.\n");
9dc3f7de
IR
4628
4629 /* Unlike the other binary operators, shifts/rotates have
4630 the rhs being int, instead of the same type as the lhs,
4631 so make sure the scalar is the right type if we are
aa948027 4632 dealing with vectors of long long/long/short/char. */
9dc3f7de
IR
4633 if (dt[1] == vect_constant_def)
4634 op1 = fold_convert (TREE_TYPE (vectype), op1);
aa948027
JJ
4635 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4636 TREE_TYPE (op1)))
4637 {
4638 if (slp_node
4639 && TYPE_MODE (TREE_TYPE (vectype))
4640 != TYPE_MODE (TREE_TYPE (op1)))
4641 {
73fbfcad 4642 if (dump_enabled_p ())
78c60e3d
SS
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4644 "unusable type for last operand in"
e645e942 4645 " vector/vector shift/rotate.\n");
21c0a521 4646 return false;
aa948027
JJ
4647 }
4648 if (vec_stmt && !slp_node)
4649 {
4650 op1 = fold_convert (TREE_TYPE (vectype), op1);
4651 op1 = vect_init_vector (stmt, op1,
4652 TREE_TYPE (vectype), NULL);
4653 }
4654 }
9dc3f7de
IR
4655 }
4656 }
4657 }
9dc3f7de
IR
4658
4659 /* Supportable by target? */
4660 if (!optab)
4661 {
73fbfcad 4662 if (dump_enabled_p ())
78c60e3d 4663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4664 "no optab.\n");
9dc3f7de
IR
4665 return false;
4666 }
4667 vec_mode = TYPE_MODE (vectype);
4668 icode = (int) optab_handler (optab, vec_mode);
4669 if (icode == CODE_FOR_nothing)
4670 {
73fbfcad 4671 if (dump_enabled_p ())
78c60e3d 4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4673 "op not supported by target.\n");
9dc3f7de
IR
4674 /* Check only during analysis. */
4675 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4676 || (vf < vect_min_worthwhile_factor (code)
4677 && !vec_stmt))
4678 return false;
73fbfcad 4679 if (dump_enabled_p ())
e645e942
TJ
4680 dump_printf_loc (MSG_NOTE, vect_location,
4681 "proceeding using word mode.\n");
9dc3f7de
IR
4682 }
4683
4684 /* Worthwhile without SIMD support? Check only during analysis. */
4685 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4686 && vf < vect_min_worthwhile_factor (code)
4687 && !vec_stmt)
4688 {
73fbfcad 4689 if (dump_enabled_p ())
78c60e3d 4690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4691 "not worthwhile without SIMD support.\n");
9dc3f7de
IR
4692 return false;
4693 }
4694
4695 if (!vec_stmt) /* transformation not required. */
4696 {
4697 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
73fbfcad 4698 if (dump_enabled_p ())
e645e942
TJ
4699 dump_printf_loc (MSG_NOTE, vect_location,
4700 "=== vectorizable_shift ===\n");
c3e7ee41 4701 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
9dc3f7de
IR
4702 return true;
4703 }
4704
4705 /** Transform. **/
4706
73fbfcad 4707 if (dump_enabled_p ())
78c60e3d 4708 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4709 "transform binary/unary operation.\n");
9dc3f7de
IR
4710
4711 /* Handle def. */
4712 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4713
9dc3f7de
IR
4714 prev_stmt_info = NULL;
4715 for (j = 0; j < ncopies; j++)
4716 {
4717 /* Handle uses. */
4718 if (j == 0)
4719 {
4720 if (scalar_shift_arg)
4721 {
4722 /* Vector shl and shr insn patterns can be defined with scalar
4723 operand 2 (shift operand). In this case, use constant or loop
4724 invariant op1 directly, without extending it to vector mode
4725 first. */
4726 optab_op2_mode = insn_data[icode].operand[2].mode;
4727 if (!VECTOR_MODE_P (optab_op2_mode))
4728 {
73fbfcad 4729 if (dump_enabled_p ())
78c60e3d 4730 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4731 "operand 1 using scalar mode.\n");
9dc3f7de 4732 vec_oprnd1 = op1;
8930f723 4733 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
9771b263 4734 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4735 if (slp_node)
4736 {
4737 /* Store vec_oprnd1 for every vector stmt to be created
4738 for SLP_NODE. We check during the analysis that all
4739 the shift arguments are the same.
4740 TODO: Allow different constants for different vector
4741 stmts generated for an SLP instance. */
4742 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
9771b263 4743 vec_oprnds1.quick_push (vec_oprnd1);
9dc3f7de
IR
4744 }
4745 }
4746 }
4747
4748 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4749 (a special case for certain kind of vector shifts); otherwise,
4750 operand 1 should be of a vector type (the usual case). */
4751 if (vec_oprnd1)
4752 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
d092494c 4753 slp_node, -1);
9dc3f7de
IR
4754 else
4755 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
d092494c 4756 slp_node, -1);
9dc3f7de
IR
4757 }
4758 else
4759 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4760
4761 /* Arguments are ready. Create the new vector stmt. */
9771b263 4762 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
9dc3f7de 4763 {
9771b263 4764 vop1 = vec_oprnds1[i];
0d0e4a03 4765 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
9dc3f7de
IR
4766 new_temp = make_ssa_name (vec_dest, new_stmt);
4767 gimple_assign_set_lhs (new_stmt, new_temp);
4768 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4769 if (slp_node)
9771b263 4770 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9dc3f7de
IR
4771 }
4772
4773 if (slp_node)
4774 continue;
4775
4776 if (j == 0)
4777 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4778 else
4779 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4780 prev_stmt_info = vinfo_for_stmt (new_stmt);
4781 }
4782
9771b263
DN
4783 vec_oprnds0.release ();
4784 vec_oprnds1.release ();
9dc3f7de
IR
4785
4786 return true;
4787}
4788
4789
ebfd146a
IR
4790/* Function vectorizable_operation.
4791
16949072
RG
4792 Check if STMT performs a binary, unary or ternary operation that can
4793 be vectorized.
b8698a0f 4794 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
4795 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4796 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4797
4798static bool
355fe088
TS
4799vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4800 gimple **vec_stmt, slp_tree slp_node)
ebfd146a 4801{
00f07b86 4802 tree vec_dest;
ebfd146a 4803 tree scalar_dest;
16949072 4804 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 4805 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
00f07b86 4806 tree vectype;
ebfd146a
IR
4807 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4808 enum tree_code code;
ef4bddc2 4809 machine_mode vec_mode;
ebfd146a
IR
4810 tree new_temp;
4811 int op_type;
00f07b86 4812 optab optab;
523ba738 4813 bool target_support_p;
355fe088 4814 gimple *def_stmt;
16949072
RG
4815 enum vect_def_type dt[3]
4816 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
355fe088 4817 gimple *new_stmt = NULL;
ebfd146a 4818 stmt_vec_info prev_stmt_info;
b690cc0f 4819 int nunits_in;
ebfd146a
IR
4820 int nunits_out;
4821 tree vectype_out;
4822 int ncopies;
4823 int j, i;
6e1aa848
DN
4824 vec<tree> vec_oprnds0 = vNULL;
4825 vec<tree> vec_oprnds1 = vNULL;
4826 vec<tree> vec_oprnds2 = vNULL;
16949072 4827 tree vop0, vop1, vop2;
a70d6342 4828 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 4829 vec_info *vinfo = stmt_info->vinfo;
a70d6342
IR
4830 int vf;
4831
a70d6342 4832 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
4833 return false;
4834
66c16fd9
RB
4835 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4836 && ! vec_stmt)
ebfd146a
IR
4837 return false;
4838
4839 /* Is STMT a vectorizable binary/unary operation? */
4840 if (!is_gimple_assign (stmt))
4841 return false;
4842
4843 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4844 return false;
4845
ebfd146a
IR
4846 code = gimple_assign_rhs_code (stmt);
4847
4848 /* For pointer addition, we should use the normal plus for
4849 the vector addition. */
4850 if (code == POINTER_PLUS_EXPR)
4851 code = PLUS_EXPR;
4852
4853 /* Support only unary or binary operations. */
4854 op_type = TREE_CODE_LENGTH (code);
16949072 4855 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a 4856 {
73fbfcad 4857 if (dump_enabled_p ())
78c60e3d 4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4859 "num. args = %d (not unary/binary/ternary op).\n",
78c60e3d 4860 op_type);
ebfd146a
IR
4861 return false;
4862 }
4863
b690cc0f
RG
4864 scalar_dest = gimple_assign_lhs (stmt);
4865 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4866
7b7b1813
RG
4867 /* Most operations cannot handle bit-precision types without extra
4868 truncations. */
045c1278
IE
4869 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4870 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4871 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
7b7b1813
RG
4872 /* Exception are bitwise binary operations. */
4873 && code != BIT_IOR_EXPR
4874 && code != BIT_XOR_EXPR
4875 && code != BIT_AND_EXPR)
4876 {
73fbfcad 4877 if (dump_enabled_p ())
78c60e3d 4878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4879 "bit-precision arithmetic not supported.\n");
7b7b1813
RG
4880 return false;
4881 }
4882
ebfd146a 4883 op0 = gimple_assign_rhs1 (stmt);
81c40241 4884 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
ebfd146a 4885 {
73fbfcad 4886 if (dump_enabled_p ())
78c60e3d 4887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4888 "use not simple.\n");
ebfd146a
IR
4889 return false;
4890 }
b690cc0f
RG
4891 /* If op0 is an external or constant def use a vector type with
4892 the same size as the output vector type. */
4893 if (!vectype)
b036c6c5
IE
4894 {
4895 /* For boolean type we cannot determine vectype by
4896 invariant value (don't know whether it is a vector
4897 of booleans or vector of integers). We use output
4898 vectype because operations on boolean don't change
4899 type. */
4900 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4901 {
4902 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4903 {
4904 if (dump_enabled_p ())
4905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4906 "not supported operation on bool value.\n");
4907 return false;
4908 }
4909 vectype = vectype_out;
4910 }
4911 else
4912 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4913 }
7d8930a0
IR
4914 if (vec_stmt)
4915 gcc_assert (vectype);
4916 if (!vectype)
4917 {
73fbfcad 4918 if (dump_enabled_p ())
7d8930a0 4919 {
78c60e3d
SS
4920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4921 "no vectype for scalar type ");
4922 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4923 TREE_TYPE (op0));
e645e942 4924 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d8930a0
IR
4925 }
4926
4927 return false;
4928 }
b690cc0f
RG
4929
4930 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4931 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4932 if (nunits_out != nunits_in)
4933 return false;
ebfd146a 4934
16949072 4935 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
4936 {
4937 op1 = gimple_assign_rhs2 (stmt);
81c40241 4938 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
ebfd146a 4939 {
73fbfcad 4940 if (dump_enabled_p ())
78c60e3d 4941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4942 "use not simple.\n");
ebfd146a
IR
4943 return false;
4944 }
4945 }
16949072
RG
4946 if (op_type == ternary_op)
4947 {
4948 op2 = gimple_assign_rhs3 (stmt);
81c40241 4949 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
16949072 4950 {
73fbfcad 4951 if (dump_enabled_p ())
78c60e3d 4952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4953 "use not simple.\n");
16949072
RG
4954 return false;
4955 }
4956 }
ebfd146a 4957
b690cc0f
RG
4958 if (loop_vinfo)
4959 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4960 else
4961 vf = 1;
4962
4963 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 4964 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f 4965 case of SLP. */
437f4a00 4966 if (slp_node || PURE_SLP_STMT (stmt_info))
b690cc0f
RG
4967 ncopies = 1;
4968 else
4969 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4970
4971 gcc_assert (ncopies >= 1);
4972
9dc3f7de 4973 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
4974 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4975 || code == RROTATE_EXPR)
9dc3f7de 4976 return false;
ebfd146a 4977
ebfd146a 4978 /* Supportable by target? */
00f07b86
RH
4979
4980 vec_mode = TYPE_MODE (vectype);
4981 if (code == MULT_HIGHPART_EXPR)
523ba738 4982 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
00f07b86
RH
4983 else
4984 {
4985 optab = optab_for_tree_code (code, vectype, optab_default);
4986 if (!optab)
5deb57cb 4987 {
73fbfcad 4988 if (dump_enabled_p ())
78c60e3d 4989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4990 "no optab.\n");
00f07b86 4991 return false;
5deb57cb 4992 }
523ba738
RS
4993 target_support_p = (optab_handler (optab, vec_mode)
4994 != CODE_FOR_nothing);
5deb57cb
JJ
4995 }
4996
523ba738 4997 if (!target_support_p)
ebfd146a 4998 {
73fbfcad 4999 if (dump_enabled_p ())
78c60e3d 5000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5001 "op not supported by target.\n");
ebfd146a
IR
5002 /* Check only during analysis. */
5003 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5deb57cb 5004 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
ebfd146a 5005 return false;
73fbfcad 5006 if (dump_enabled_p ())
e645e942
TJ
5007 dump_printf_loc (MSG_NOTE, vect_location,
5008 "proceeding using word mode.\n");
383d9c83
IR
5009 }
5010
4a00c761 5011 /* Worthwhile without SIMD support? Check only during analysis. */
5deb57cb
JJ
5012 if (!VECTOR_MODE_P (vec_mode)
5013 && !vec_stmt
5014 && vf < vect_min_worthwhile_factor (code))
7d8930a0 5015 {
73fbfcad 5016 if (dump_enabled_p ())
78c60e3d 5017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5018 "not worthwhile without SIMD support.\n");
e34842c6 5019 return false;
7d8930a0 5020 }
ebfd146a 5021
ebfd146a
IR
5022 if (!vec_stmt) /* transformation not required. */
5023 {
4a00c761 5024 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
73fbfcad 5025 if (dump_enabled_p ())
78c60e3d 5026 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5027 "=== vectorizable_operation ===\n");
c3e7ee41 5028 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
ebfd146a
IR
5029 return true;
5030 }
5031
5032 /** Transform. **/
5033
73fbfcad 5034 if (dump_enabled_p ())
78c60e3d 5035 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5036 "transform binary/unary operation.\n");
383d9c83 5037
ebfd146a 5038 /* Handle def. */
00f07b86 5039 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b8698a0f 5040
ebfd146a
IR
5041 /* In case the vectorization factor (VF) is bigger than the number
5042 of elements that we can fit in a vectype (nunits), we have to generate
5043 more than one vector stmt - i.e - we need to "unroll" the
4a00c761
JJ
5044 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5045 from one copy of the vector stmt to the next, in the field
5046 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5047 stages to find the correct vector defs to be used when vectorizing
5048 stmts that use the defs of the current stmt. The example below
5049 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5050 we need to create 4 vectorized stmts):
5051
5052 before vectorization:
5053 RELATED_STMT VEC_STMT
5054 S1: x = memref - -
5055 S2: z = x + 1 - -
5056
5057 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5058 there):
5059 RELATED_STMT VEC_STMT
5060 VS1_0: vx0 = memref0 VS1_1 -
5061 VS1_1: vx1 = memref1 VS1_2 -
5062 VS1_2: vx2 = memref2 VS1_3 -
5063 VS1_3: vx3 = memref3 - -
5064 S1: x = load - VS1_0
5065 S2: z = x + 1 - -
5066
5067 step2: vectorize stmt S2 (done here):
5068 To vectorize stmt S2 we first need to find the relevant vector
5069 def for the first operand 'x'. This is, as usual, obtained from
5070 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5071 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5072 relevant vector def 'vx0'. Having found 'vx0' we can generate
5073 the vector stmt VS2_0, and as usual, record it in the
5074 STMT_VINFO_VEC_STMT of stmt S2.
5075 When creating the second copy (VS2_1), we obtain the relevant vector
5076 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5077 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5078 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5079 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5080 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5081 chain of stmts and pointers:
5082 RELATED_STMT VEC_STMT
5083 VS1_0: vx0 = memref0 VS1_1 -
5084 VS1_1: vx1 = memref1 VS1_2 -
5085 VS1_2: vx2 = memref2 VS1_3 -
5086 VS1_3: vx3 = memref3 - -
5087 S1: x = load - VS1_0
5088 VS2_0: vz0 = vx0 + v1 VS2_1 -
5089 VS2_1: vz1 = vx1 + v1 VS2_2 -
5090 VS2_2: vz2 = vx2 + v1 VS2_3 -
5091 VS2_3: vz3 = vx3 + v1 - -
5092 S2: z = x + 1 - VS2_0 */
ebfd146a
IR
5093
5094 prev_stmt_info = NULL;
5095 for (j = 0; j < ncopies; j++)
5096 {
5097 /* Handle uses. */
5098 if (j == 0)
4a00c761
JJ
5099 {
5100 if (op_type == binary_op || op_type == ternary_op)
5101 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5102 slp_node, -1);
5103 else
5104 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5105 slp_node, -1);
5106 if (op_type == ternary_op)
36ba4aae 5107 {
9771b263
DN
5108 vec_oprnds2.create (1);
5109 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
81c40241 5110 stmt));
36ba4aae 5111 }
4a00c761 5112 }
ebfd146a 5113 else
4a00c761
JJ
5114 {
5115 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5116 if (op_type == ternary_op)
5117 {
9771b263
DN
5118 tree vec_oprnd = vec_oprnds2.pop ();
5119 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5120 vec_oprnd));
4a00c761
JJ
5121 }
5122 }
5123
5124 /* Arguments are ready. Create the new vector stmt. */
9771b263 5125 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
ebfd146a 5126 {
4a00c761 5127 vop1 = ((op_type == binary_op || op_type == ternary_op)
9771b263 5128 ? vec_oprnds1[i] : NULL_TREE);
4a00c761 5129 vop2 = ((op_type == ternary_op)
9771b263 5130 ? vec_oprnds2[i] : NULL_TREE);
0d0e4a03 5131 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4a00c761
JJ
5132 new_temp = make_ssa_name (vec_dest, new_stmt);
5133 gimple_assign_set_lhs (new_stmt, new_temp);
5134 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5135 if (slp_node)
9771b263 5136 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
ebfd146a
IR
5137 }
5138
4a00c761
JJ
5139 if (slp_node)
5140 continue;
5141
5142 if (j == 0)
5143 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5144 else
5145 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5146 prev_stmt_info = vinfo_for_stmt (new_stmt);
ebfd146a
IR
5147 }
5148
9771b263
DN
5149 vec_oprnds0.release ();
5150 vec_oprnds1.release ();
5151 vec_oprnds2.release ();
ebfd146a 5152
ebfd146a
IR
5153 return true;
5154}
5155
c716e67f
XDL
5156/* A helper function to ensure data reference DR's base alignment
5157 for STMT_INFO. */
5158
5159static void
5160ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5161{
5162 if (!dr->aux)
5163 return;
5164
52639a61 5165 if (DR_VECT_AUX (dr)->base_misaligned)
c716e67f
XDL
5166 {
5167 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
52639a61 5168 tree base_decl = DR_VECT_AUX (dr)->base_decl;
c716e67f 5169
428f0c67
JH
5170 if (decl_in_symtab_p (base_decl))
5171 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5172 else
5173 {
5174 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5175 DECL_USER_ALIGN (base_decl) = 1;
5176 }
52639a61 5177 DR_VECT_AUX (dr)->base_misaligned = false;
c716e67f
XDL
5178 }
5179}
5180
ebfd146a 5181
09dfa495
BM
5182/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5183 reversal of the vector elements. If that is impossible to do,
5184 returns NULL. */
5185
5186static tree
5187perm_mask_for_reverse (tree vectype)
5188{
5189 int i, nunits;
5190 unsigned char *sel;
5191
5192 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5193 sel = XALLOCAVEC (unsigned char, nunits);
5194
5195 for (i = 0; i < nunits; ++i)
5196 sel[i] = nunits - 1 - i;
5197
557be5a8
AL
5198 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5199 return NULL_TREE;
5200 return vect_gen_perm_mask_checked (vectype, sel);
09dfa495
BM
5201}
5202
ebfd146a
IR
5203/* Function vectorizable_store.
5204
b8698a0f
L
5205 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5206 can be vectorized.
5207 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
5208 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5209 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5210
5211static bool
355fe088 5212vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 5213 slp_tree slp_node)
ebfd146a
IR
5214{
5215 tree scalar_dest;
5216 tree data_ref;
5217 tree op;
5218 tree vec_oprnd = NULL_TREE;
5219 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5220 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 5221 tree elem_type;
ebfd146a 5222 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 5223 struct loop *loop = NULL;
ef4bddc2 5224 machine_mode vec_mode;
ebfd146a
IR
5225 tree dummy;
5226 enum dr_alignment_support alignment_support_scheme;
355fe088 5227 gimple *def_stmt;
ebfd146a
IR
5228 enum vect_def_type dt;
5229 stmt_vec_info prev_stmt_info = NULL;
5230 tree dataref_ptr = NULL_TREE;
74bf76ed 5231 tree dataref_offset = NULL_TREE;
355fe088 5232 gimple *ptr_incr = NULL;
ebfd146a
IR
5233 int ncopies;
5234 int j;
355fe088 5235 gimple *next_stmt, *first_stmt = NULL;
0d0293ac 5236 bool grouped_store = false;
272c6793 5237 bool store_lanes_p = false;
ebfd146a 5238 unsigned int group_size, i;
6e1aa848
DN
5239 vec<tree> dr_chain = vNULL;
5240 vec<tree> oprnds = vNULL;
5241 vec<tree> result_chain = vNULL;
ebfd146a 5242 bool inv_p;
09dfa495
BM
5243 bool negative = false;
5244 tree offset = NULL_TREE;
6e1aa848 5245 vec<tree> vec_oprnds = vNULL;
ebfd146a 5246 bool slp = (slp_node != NULL);
ebfd146a 5247 unsigned int vec_num;
a70d6342 5248 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
310213d4 5249 vec_info *vinfo = stmt_info->vinfo;
272c6793 5250 tree aggr_type;
3bab6342
AT
5251 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5252 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5253 int scatter_scale = 1;
5254 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5255 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
355fe088 5256 gimple *new_stmt;
a70d6342 5257
a70d6342 5258 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
5259 return false;
5260
66c16fd9
RB
5261 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5262 && ! vec_stmt)
ebfd146a
IR
5263 return false;
5264
5265 /* Is vectorizable store? */
5266
5267 if (!is_gimple_assign (stmt))
5268 return false;
5269
5270 scalar_dest = gimple_assign_lhs (stmt);
ab0ef706
JJ
5271 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5272 && is_pattern_stmt_p (stmt_info))
5273 scalar_dest = TREE_OPERAND (scalar_dest, 0);
ebfd146a 5274 if (TREE_CODE (scalar_dest) != ARRAY_REF
38000232 5275 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
ebfd146a 5276 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
5277 && TREE_CODE (scalar_dest) != COMPONENT_REF
5278 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
5279 && TREE_CODE (scalar_dest) != REALPART_EXPR
5280 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
5281 return false;
5282
5283 gcc_assert (gimple_assign_single_p (stmt));
465c8c19 5284
f4d09712 5285 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
465c8c19
JJ
5286 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5287
5288 if (loop_vinfo)
5289 loop = LOOP_VINFO_LOOP (loop_vinfo);
5290
5291 /* Multiple types in SLP are handled by creating the appropriate number of
5292 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5293 case of SLP. */
5294 if (slp || PURE_SLP_STMT (stmt_info))
5295 ncopies = 1;
5296 else
5297 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5298
5299 gcc_assert (ncopies >= 1);
5300
5301 /* FORNOW. This restriction should be relaxed. */
5302 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5303 {
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5306 "multiple types in nested loop.\n");
5307 return false;
5308 }
5309
ebfd146a 5310 op = gimple_assign_rhs1 (stmt);
f4d09712
KY
5311
5312 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
ebfd146a 5313 {
73fbfcad 5314 if (dump_enabled_p ())
78c60e3d 5315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5316 "use not simple.\n");
ebfd146a
IR
5317 return false;
5318 }
5319
f4d09712
KY
5320 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5321 return false;
5322
272c6793 5323 elem_type = TREE_TYPE (vectype);
ebfd146a 5324 vec_mode = TYPE_MODE (vectype);
7b7b1813 5325
ebfd146a
IR
5326 /* FORNOW. In some cases can vectorize even if data-type not supported
5327 (e.g. - array initialization with 0). */
947131ba 5328 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
5329 return false;
5330
5331 if (!STMT_VINFO_DATA_REF (stmt_info))
5332 return false;
5333
f2e2a985 5334 if (!STMT_VINFO_STRIDED_P (stmt_info))
09dfa495 5335 {
f2e2a985
MM
5336 negative =
5337 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5338 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5339 size_zero_node) < 0;
5340 if (negative && ncopies > 1)
09dfa495
BM
5341 {
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
f2e2a985 5344 "multiple types with negative step.\n");
09dfa495
BM
5345 return false;
5346 }
f2e2a985 5347 if (negative)
09dfa495 5348 {
f2e2a985
MM
5349 gcc_assert (!grouped_store);
5350 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5351 if (alignment_support_scheme != dr_aligned
5352 && alignment_support_scheme != dr_unaligned_supported)
5353 {
5354 if (dump_enabled_p ())
5355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5356 "negative step but alignment required.\n");
5357 return false;
5358 }
5359 if (dt != vect_constant_def
5360 && dt != vect_external_def
5361 && !perm_mask_for_reverse (vectype))
5362 {
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5365 "negative step and reversing not supported.\n");
5366 return false;
5367 }
09dfa495
BM
5368 }
5369 }
5370
0d0293ac 5371 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 5372 {
0d0293ac 5373 grouped_store = true;
e14c1050 5374 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
cee62fee
MM
5375 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5376 if (!slp
5377 && !PURE_SLP_STMT (stmt_info)
5378 && !STMT_VINFO_STRIDED_P (stmt_info))
b602d918 5379 {
272c6793
RS
5380 if (vect_store_lanes_supported (vectype, group_size))
5381 store_lanes_p = true;
0d0293ac 5382 else if (!vect_grouped_store_supported (vectype, group_size))
b602d918
RS
5383 return false;
5384 }
b8698a0f 5385
cee62fee
MM
5386 if (STMT_VINFO_STRIDED_P (stmt_info)
5387 && (slp || PURE_SLP_STMT (stmt_info))
5388 && (group_size > nunits
5389 || nunits % group_size != 0))
5390 {
5391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5392 "unhandled strided group store\n");
5393 return false;
5394 }
5395
ebfd146a
IR
5396 if (first_stmt == stmt)
5397 {
5398 /* STMT is the leader of the group. Check the operands of all the
5399 stmts of the group. */
e14c1050 5400 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
ebfd146a
IR
5401 while (next_stmt)
5402 {
5403 gcc_assert (gimple_assign_single_p (next_stmt));
5404 op = gimple_assign_rhs1 (next_stmt);
81c40241 5405 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
ebfd146a 5406 {
73fbfcad 5407 if (dump_enabled_p ())
78c60e3d 5408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5409 "use not simple.\n");
ebfd146a
IR
5410 return false;
5411 }
e14c1050 5412 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5413 }
5414 }
5415 }
5416
3bab6342
AT
5417 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5418 {
355fe088 5419 gimple *def_stmt;
3bab6342
AT
5420 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5421 &scatter_off, &scatter_scale);
5422 gcc_assert (scatter_decl);
81c40241
RB
5423 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5424 &scatter_off_vectype))
3bab6342
AT
5425 {
5426 if (dump_enabled_p ())
5427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5428 "scatter index use not simple.");
5429 return false;
5430 }
5431 }
5432
ebfd146a
IR
5433 if (!vec_stmt) /* transformation not required. */
5434 {
5435 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2e8ab70c
RB
5436 /* The SLP costs are calculated during SLP analysis. */
5437 if (!PURE_SLP_STMT (stmt_info))
5438 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5439 NULL, NULL, NULL);
ebfd146a
IR
5440 return true;
5441 }
5442
5443 /** Transform. **/
5444
c716e67f
XDL
5445 ensure_base_align (stmt_info, dr);
5446
3bab6342
AT
5447 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5448 {
5449 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5450 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5451 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5452 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5453 edge pe = loop_preheader_edge (loop);
5454 gimple_seq seq;
5455 basic_block new_bb;
5456 enum { NARROW, NONE, WIDEN } modifier;
5457 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5458
5459 if (nunits == (unsigned int) scatter_off_nunits)
5460 modifier = NONE;
5461 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5462 {
5463 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5464 modifier = WIDEN;
5465
5466 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5467 sel[i] = i | nunits;
5468
5469 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5470 gcc_assert (perm_mask != NULL_TREE);
5471 }
5472 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5473 {
5474 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5475 modifier = NARROW;
5476
5477 for (i = 0; i < (unsigned int) nunits; ++i)
5478 sel[i] = i | scatter_off_nunits;
5479
5480 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5481 gcc_assert (perm_mask != NULL_TREE);
5482 ncopies *= 2;
5483 }
5484 else
5485 gcc_unreachable ();
5486
5487 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5488 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5489 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5490 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5491 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5492 scaletype = TREE_VALUE (arglist);
5493
5494 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5495 && TREE_CODE (rettype) == VOID_TYPE);
5496
5497 ptr = fold_convert (ptrtype, scatter_base);
5498 if (!is_gimple_min_invariant (ptr))
5499 {
5500 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5501 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5502 gcc_assert (!new_bb);
5503 }
5504
5505 /* Currently we support only unconditional scatter stores,
5506 so mask should be all ones. */
5507 mask = build_int_cst (masktype, -1);
5508 mask = vect_init_vector (stmt, mask, masktype, NULL);
5509
5510 scale = build_int_cst (scaletype, scatter_scale);
5511
5512 prev_stmt_info = NULL;
5513 for (j = 0; j < ncopies; ++j)
5514 {
5515 if (j == 0)
5516 {
5517 src = vec_oprnd1
81c40241 5518 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
3bab6342 5519 op = vec_oprnd0
81c40241 5520 = vect_get_vec_def_for_operand (scatter_off, stmt);
3bab6342
AT
5521 }
5522 else if (modifier != NONE && (j & 1))
5523 {
5524 if (modifier == WIDEN)
5525 {
5526 src = vec_oprnd1
5527 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5528 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5529 stmt, gsi);
5530 }
5531 else if (modifier == NARROW)
5532 {
5533 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5534 stmt, gsi);
5535 op = vec_oprnd0
5536 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5537 }
5538 else
5539 gcc_unreachable ();
5540 }
5541 else
5542 {
5543 src = vec_oprnd1
5544 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5545 op = vec_oprnd0
5546 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5547 }
5548
5549 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5550 {
5551 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5552 == TYPE_VECTOR_SUBPARTS (srctype));
0e22bb5a 5553 var = vect_get_new_ssa_name (srctype, vect_simple_var);
3bab6342
AT
5554 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5555 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5557 src = var;
5558 }
5559
5560 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5561 {
5562 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5563 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 5564 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3bab6342
AT
5565 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5566 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5568 op = var;
5569 }
5570
5571 new_stmt
5572 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5573
5574 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5575
5576 if (prev_stmt_info == NULL)
5577 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5578 else
5579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5580 prev_stmt_info = vinfo_for_stmt (new_stmt);
5581 }
5582 return true;
5583 }
5584
0d0293ac 5585 if (grouped_store)
ebfd146a
IR
5586 {
5587 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 5588 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
ebfd146a 5589
e14c1050 5590 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
ebfd146a
IR
5591
5592 /* FORNOW */
a70d6342 5593 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
5594
5595 /* We vectorize all the stmts of the interleaving group when we
5596 reach the last stmt in the group. */
e14c1050
IR
5597 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5598 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
ebfd146a
IR
5599 && !slp)
5600 {
5601 *vec_stmt = NULL;
5602 return true;
5603 }
5604
5605 if (slp)
4b5caab7 5606 {
0d0293ac 5607 grouped_store = false;
4b5caab7
IR
5608 /* VEC_NUM is the number of vect stmts to be created for this
5609 group. */
5610 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9771b263 5611 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
52eab378 5612 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
4b5caab7 5613 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
d092494c 5614 op = gimple_assign_rhs1 (first_stmt);
4b5caab7 5615 }
ebfd146a 5616 else
4b5caab7
IR
5617 /* VEC_NUM is the number of vect stmts to be created for this
5618 group. */
ebfd146a
IR
5619 vec_num = group_size;
5620 }
b8698a0f 5621 else
ebfd146a
IR
5622 {
5623 first_stmt = stmt;
5624 first_dr = dr;
5625 group_size = vec_num = 1;
ebfd146a 5626 }
b8698a0f 5627
73fbfcad 5628 if (dump_enabled_p ())
78c60e3d 5629 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5630 "transform store. ncopies = %d\n", ncopies);
ebfd146a 5631
f2e2a985
MM
5632 if (STMT_VINFO_STRIDED_P (stmt_info))
5633 {
5634 gimple_stmt_iterator incr_gsi;
5635 bool insert_after;
355fe088 5636 gimple *incr;
f2e2a985
MM
5637 tree offvar;
5638 tree ivstep;
5639 tree running_off;
5640 gimple_seq stmts = NULL;
5641 tree stride_base, stride_step, alias_off;
5642 tree vec_oprnd;
f502d50e 5643 unsigned int g;
f2e2a985
MM
5644
5645 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5646
5647 stride_base
5648 = fold_build_pointer_plus
f502d50e 5649 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
f2e2a985 5650 size_binop (PLUS_EXPR,
f502d50e
MM
5651 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5652 convert_to_ptrofftype (DR_INIT(first_dr))));
5653 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
f2e2a985
MM
5654
5655 /* For a store with loop-invariant (but other than power-of-2)
5656 stride (i.e. not a grouped access) like so:
5657
5658 for (i = 0; i < n; i += stride)
5659 array[i] = ...;
5660
5661 we generate a new induction variable and new stores from
5662 the components of the (vectorized) rhs:
5663
5664 for (j = 0; ; j += VF*stride)
5665 vectemp = ...;
5666 tmp1 = vectemp[0];
5667 array[j] = tmp1;
5668 tmp2 = vectemp[1];
5669 array[j + stride] = tmp2;
5670 ...
5671 */
5672
cee62fee
MM
5673 unsigned nstores = nunits;
5674 tree ltype = elem_type;
5675 if (slp)
5676 {
5677 nstores = nunits / group_size;
5678 if (group_size < nunits)
5679 ltype = build_vector_type (elem_type, group_size);
5680 else
5681 ltype = vectype;
5682 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5683 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
f502d50e 5684 group_size = 1;
cee62fee
MM
5685 }
5686
f2e2a985
MM
5687 ivstep = stride_step;
5688 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5689 build_int_cst (TREE_TYPE (ivstep),
cee62fee 5690 ncopies * nstores));
f2e2a985
MM
5691
5692 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5693
5694 create_iv (stride_base, ivstep, NULL,
5695 loop, &incr_gsi, insert_after,
5696 &offvar, NULL);
5697 incr = gsi_stmt (incr_gsi);
310213d4 5698 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
f2e2a985
MM
5699
5700 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5701 if (stmts)
5702 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5703
5704 prev_stmt_info = NULL;
f502d50e
MM
5705 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5706 next_stmt = first_stmt;
5707 for (g = 0; g < group_size; g++)
f2e2a985 5708 {
f502d50e
MM
5709 running_off = offvar;
5710 if (g)
f2e2a985 5711 {
f502d50e
MM
5712 tree size = TYPE_SIZE_UNIT (ltype);
5713 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
f2e2a985 5714 size);
f502d50e 5715 tree newoff = copy_ssa_name (running_off, NULL);
f2e2a985 5716 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
f502d50e 5717 running_off, pos);
f2e2a985 5718 vect_finish_stmt_generation (stmt, incr, gsi);
f2e2a985 5719 running_off = newoff;
f502d50e
MM
5720 }
5721 for (j = 0; j < ncopies; j++)
5722 {
5723 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5724 and first_stmt == stmt. */
5725 if (j == 0)
5726 {
5727 if (slp)
5728 {
5729 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5730 slp_node, -1);
5731 vec_oprnd = vec_oprnds[0];
5732 }
5733 else
5734 {
5735 gcc_assert (gimple_assign_single_p (next_stmt));
5736 op = gimple_assign_rhs1 (next_stmt);
81c40241 5737 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
f502d50e
MM
5738 }
5739 }
f2e2a985 5740 else
f502d50e
MM
5741 {
5742 if (slp)
5743 vec_oprnd = vec_oprnds[j];
5744 else
c079cbac 5745 {
81c40241 5746 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
c079cbac
RB
5747 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5748 }
f502d50e
MM
5749 }
5750
5751 for (i = 0; i < nstores; i++)
5752 {
5753 tree newref, newoff;
355fe088 5754 gimple *incr, *assign;
f502d50e
MM
5755 tree size = TYPE_SIZE (ltype);
5756 /* Extract the i'th component. */
5757 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5758 bitsize_int (i), size);
5759 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5760 size, pos);
5761
5762 elem = force_gimple_operand_gsi (gsi, elem, true,
5763 NULL_TREE, true,
5764 GSI_SAME_STMT);
5765
5766 newref = build2 (MEM_REF, ltype,
5767 running_off, alias_off);
5768
5769 /* And store it to *running_off. */
5770 assign = gimple_build_assign (newref, elem);
5771 vect_finish_stmt_generation (stmt, assign, gsi);
5772
5773 newoff = copy_ssa_name (running_off, NULL);
5774 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5775 running_off, stride_step);
5776 vect_finish_stmt_generation (stmt, incr, gsi);
5777
5778 running_off = newoff;
225ce44b
RB
5779 if (g == group_size - 1
5780 && !slp)
f502d50e
MM
5781 {
5782 if (j == 0 && i == 0)
225ce44b
RB
5783 STMT_VINFO_VEC_STMT (stmt_info)
5784 = *vec_stmt = assign;
f502d50e
MM
5785 else
5786 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5787 prev_stmt_info = vinfo_for_stmt (assign);
5788 }
5789 }
f2e2a985 5790 }
f502d50e 5791 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
f2e2a985
MM
5792 }
5793 return true;
5794 }
5795
9771b263
DN
5796 dr_chain.create (group_size);
5797 oprnds.create (group_size);
ebfd146a 5798
720f5239 5799 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 5800 gcc_assert (alignment_support_scheme);
272c6793
RS
5801 /* Targets with store-lane instructions must not require explicit
5802 realignment. */
5803 gcc_assert (!store_lanes_p
5804 || alignment_support_scheme == dr_aligned
5805 || alignment_support_scheme == dr_unaligned_supported);
5806
09dfa495
BM
5807 if (negative)
5808 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5809
272c6793
RS
5810 if (store_lanes_p)
5811 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5812 else
5813 aggr_type = vectype;
ebfd146a
IR
5814
5815 /* In case the vectorization factor (VF) is bigger than the number
5816 of elements that we can fit in a vectype (nunits), we have to generate
5817 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 5818 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
5819 vect_get_vec_def_for_copy_stmt. */
5820
0d0293ac 5821 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
5822
5823 S1: &base + 2 = x2
5824 S2: &base = x0
5825 S3: &base + 1 = x1
5826 S4: &base + 3 = x3
5827
5828 We create vectorized stores starting from base address (the access of the
5829 first stmt in the chain (S2 in the above example), when the last store stmt
5830 of the chain (S4) is reached:
5831
5832 VS1: &base = vx2
5833 VS2: &base + vec_size*1 = vx0
5834 VS3: &base + vec_size*2 = vx1
5835 VS4: &base + vec_size*3 = vx3
5836
5837 Then permutation statements are generated:
5838
3fcc1b55
JJ
5839 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5840 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
ebfd146a 5841 ...
b8698a0f 5842
ebfd146a
IR
5843 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5844 (the order of the data-refs in the output of vect_permute_store_chain
5845 corresponds to the order of scalar stmts in the interleaving chain - see
5846 the documentation of vect_permute_store_chain()).
5847
5848 In case of both multiple types and interleaving, above vector stores and
ff802fa1 5849 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 5850 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 5851 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
5852 */
5853
5854 prev_stmt_info = NULL;
5855 for (j = 0; j < ncopies; j++)
5856 {
ebfd146a
IR
5857
5858 if (j == 0)
5859 {
5860 if (slp)
5861 {
5862 /* Get vectorized arguments for SLP_NODE. */
d092494c
IR
5863 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5864 NULL, slp_node, -1);
ebfd146a 5865
9771b263 5866 vec_oprnd = vec_oprnds[0];
ebfd146a
IR
5867 }
5868 else
5869 {
b8698a0f
L
5870 /* For interleaved stores we collect vectorized defs for all the
5871 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5872 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
5873 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5874
0d0293ac 5875 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a 5876 OPRNDS are of size 1. */
b8698a0f 5877 next_stmt = first_stmt;
ebfd146a
IR
5878 for (i = 0; i < group_size; i++)
5879 {
b8698a0f
L
5880 /* Since gaps are not supported for interleaved stores,
5881 GROUP_SIZE is the exact number of stmts in the chain.
5882 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5883 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
5884 iteration of the loop will be executed. */
5885 gcc_assert (next_stmt
5886 && gimple_assign_single_p (next_stmt));
5887 op = gimple_assign_rhs1 (next_stmt);
5888
81c40241 5889 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
9771b263
DN
5890 dr_chain.quick_push (vec_oprnd);
5891 oprnds.quick_push (vec_oprnd);
e14c1050 5892 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5893 }
5894 }
5895
5896 /* We should have catched mismatched types earlier. */
5897 gcc_assert (useless_type_conversion_p (vectype,
5898 TREE_TYPE (vec_oprnd)));
74bf76ed
JJ
5899 bool simd_lane_access_p
5900 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5901 if (simd_lane_access_p
5902 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5903 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5904 && integer_zerop (DR_OFFSET (first_dr))
5905 && integer_zerop (DR_INIT (first_dr))
5906 && alias_sets_conflict_p (get_alias_set (aggr_type),
5907 get_alias_set (DR_REF (first_dr))))
5908 {
5909 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5910 dataref_offset = build_int_cst (reference_alias_ptr_type
5911 (DR_REF (first_dr)), 0);
8928eff3 5912 inv_p = false;
74bf76ed
JJ
5913 }
5914 else
5915 dataref_ptr
5916 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5917 simd_lane_access_p ? loop : NULL,
09dfa495 5918 offset, &dummy, gsi, &ptr_incr,
74bf76ed 5919 simd_lane_access_p, &inv_p);
a70d6342 5920 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 5921 }
b8698a0f 5922 else
ebfd146a 5923 {
b8698a0f
L
5924 /* For interleaved stores we created vectorized defs for all the
5925 defs stored in OPRNDS in the previous iteration (previous copy).
5926 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
5927 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5928 next copy.
0d0293ac 5929 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
ebfd146a
IR
5930 OPRNDS are of size 1. */
5931 for (i = 0; i < group_size; i++)
5932 {
9771b263 5933 op = oprnds[i];
81c40241 5934 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
b8698a0f 5935 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
9771b263
DN
5936 dr_chain[i] = vec_oprnd;
5937 oprnds[i] = vec_oprnd;
ebfd146a 5938 }
74bf76ed
JJ
5939 if (dataref_offset)
5940 dataref_offset
5941 = int_const_binop (PLUS_EXPR, dataref_offset,
5942 TYPE_SIZE_UNIT (aggr_type));
5943 else
5944 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5945 TYPE_SIZE_UNIT (aggr_type));
ebfd146a
IR
5946 }
5947
272c6793 5948 if (store_lanes_p)
ebfd146a 5949 {
272c6793 5950 tree vec_array;
267d3070 5951
272c6793
RS
5952 /* Combine all the vectors into an array. */
5953 vec_array = create_vector_array (vectype, vec_num);
5954 for (i = 0; i < vec_num; i++)
c2d7ab2a 5955 {
9771b263 5956 vec_oprnd = dr_chain[i];
272c6793 5957 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
267d3070 5958 }
b8698a0f 5959
272c6793
RS
5960 /* Emit:
5961 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5962 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5963 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5964 gimple_call_set_lhs (new_stmt, data_ref);
267d3070 5965 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
5966 }
5967 else
5968 {
5969 new_stmt = NULL;
0d0293ac 5970 if (grouped_store)
272c6793 5971 {
b6b9227d
JJ
5972 if (j == 0)
5973 result_chain.create (group_size);
272c6793
RS
5974 /* Permute. */
5975 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5976 &result_chain);
5977 }
c2d7ab2a 5978
272c6793
RS
5979 next_stmt = first_stmt;
5980 for (i = 0; i < vec_num; i++)
5981 {
644ffefd 5982 unsigned align, misalign;
272c6793
RS
5983
5984 if (i > 0)
5985 /* Bump the vector pointer. */
5986 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5987 stmt, NULL_TREE);
5988
5989 if (slp)
9771b263 5990 vec_oprnd = vec_oprnds[i];
0d0293ac
MM
5991 else if (grouped_store)
5992 /* For grouped stores vectorized defs are interleaved in
272c6793 5993 vect_permute_store_chain(). */
9771b263 5994 vec_oprnd = result_chain[i];
272c6793 5995
aed93b23
RB
5996 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5997 dataref_ptr,
5998 dataref_offset
5999 ? dataref_offset
6000 : build_int_cst (reference_alias_ptr_type
6001 (DR_REF (first_dr)), 0));
644ffefd 6002 align = TYPE_ALIGN_UNIT (vectype);
272c6793 6003 if (aligned_access_p (first_dr))
644ffefd 6004 misalign = 0;
272c6793
RS
6005 else if (DR_MISALIGNMENT (first_dr) == -1)
6006 {
52639a61
RB
6007 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6008 align = TYPE_ALIGN_UNIT (elem_type);
6009 else
6010 align = get_object_alignment (DR_REF (first_dr))
6011 / BITS_PER_UNIT;
6012 misalign = 0;
272c6793
RS
6013 TREE_TYPE (data_ref)
6014 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 6015 align * BITS_PER_UNIT);
272c6793
RS
6016 }
6017 else
6018 {
6019 TREE_TYPE (data_ref)
6020 = build_aligned_type (TREE_TYPE (data_ref),
6021 TYPE_ALIGN (elem_type));
644ffefd 6022 misalign = DR_MISALIGNMENT (first_dr);
272c6793 6023 }
aed93b23
RB
6024 if (dataref_offset == NULL_TREE
6025 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
6026 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6027 misalign);
c2d7ab2a 6028
f234d260
BM
6029 if (negative
6030 && dt != vect_constant_def
6031 && dt != vect_external_def)
09dfa495
BM
6032 {
6033 tree perm_mask = perm_mask_for_reverse (vectype);
6034 tree perm_dest
6035 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6036 vectype);
b731b390 6037 tree new_temp = make_ssa_name (perm_dest);
09dfa495
BM
6038
6039 /* Generate the permute statement. */
355fe088 6040 gimple *perm_stmt
0d0e4a03
JJ
6041 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6042 vec_oprnd, perm_mask);
09dfa495
BM
6043 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6044
6045 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6046 vec_oprnd = new_temp;
6047 }
6048
272c6793
RS
6049 /* Arguments are ready. Create the new vector stmt. */
6050 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6051 vect_finish_stmt_generation (stmt, new_stmt, gsi);
272c6793
RS
6052
6053 if (slp)
6054 continue;
6055
e14c1050 6056 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
272c6793
RS
6057 if (!next_stmt)
6058 break;
6059 }
ebfd146a 6060 }
1da0876c
RS
6061 if (!slp)
6062 {
6063 if (j == 0)
6064 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6065 else
6066 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6067 prev_stmt_info = vinfo_for_stmt (new_stmt);
6068 }
ebfd146a
IR
6069 }
6070
9771b263
DN
6071 dr_chain.release ();
6072 oprnds.release ();
6073 result_chain.release ();
6074 vec_oprnds.release ();
ebfd146a
IR
6075
6076 return true;
6077}
6078
557be5a8
AL
6079/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6080 VECTOR_CST mask. No checks are made that the target platform supports the
6081 mask, so callers may wish to test can_vec_perm_p separately, or use
6082 vect_gen_perm_mask_checked. */
a1e53f3f 6083
3fcc1b55 6084tree
557be5a8 6085vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
a1e53f3f 6086{
d2a12ae7 6087 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
2635892a 6088 int i, nunits;
a1e53f3f 6089
22e4dee7 6090 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 6091
96f9265a
RG
6092 mask_elt_type = lang_hooks.types.type_for_mode
6093 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
22e4dee7 6094 mask_type = get_vectype_for_scalar_type (mask_elt_type);
a1e53f3f 6095
d2a12ae7 6096 mask_elts = XALLOCAVEC (tree, nunits);
aec7ae7d 6097 for (i = nunits - 1; i >= 0; i--)
d2a12ae7
RG
6098 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6099 mask_vec = build_vector (mask_type, mask_elts);
a1e53f3f 6100
2635892a 6101 return mask_vec;
a1e53f3f
L
6102}
6103
cf7aa6a3
AL
6104/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6105 i.e. that the target supports the pattern _for arbitrary input vectors_. */
557be5a8
AL
6106
6107tree
6108vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6109{
6110 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6111 return vect_gen_perm_mask_any (vectype, sel);
6112}
6113
aec7ae7d
JJ
6114/* Given a vector variable X and Y, that was generated for the scalar
6115 STMT, generate instructions to permute the vector elements of X and Y
6116 using permutation mask MASK_VEC, insert them at *GSI and return the
6117 permuted vector variable. */
a1e53f3f
L
6118
6119static tree
355fe088 6120permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
aec7ae7d 6121 gimple_stmt_iterator *gsi)
a1e53f3f
L
6122{
6123 tree vectype = TREE_TYPE (x);
aec7ae7d 6124 tree perm_dest, data_ref;
355fe088 6125 gimple *perm_stmt;
a1e53f3f 6126
acdcd61b 6127 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
b731b390 6128 data_ref = make_ssa_name (perm_dest);
a1e53f3f
L
6129
6130 /* Generate the permute statement. */
0d0e4a03 6131 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
a1e53f3f
L
6132 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6133
6134 return data_ref;
6135}
6136
6b916b36
RB
6137/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6138 inserting them on the loops preheader edge. Returns true if we
6139 were successful in doing so (and thus STMT can be moved then),
6140 otherwise returns false. */
6141
6142static bool
355fe088 6143hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6b916b36
RB
6144{
6145 ssa_op_iter i;
6146 tree op;
6147 bool any = false;
6148
6149 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6150 {
355fe088 6151 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6152 if (!gimple_nop_p (def_stmt)
6153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6154 {
6155 /* Make sure we don't need to recurse. While we could do
6156 so in simple cases when there are more complex use webs
6157 we don't have an easy way to preserve stmt order to fulfil
6158 dependencies within them. */
6159 tree op2;
6160 ssa_op_iter i2;
d1417442
JJ
6161 if (gimple_code (def_stmt) == GIMPLE_PHI)
6162 return false;
6b916b36
RB
6163 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6164 {
355fe088 6165 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6b916b36
RB
6166 if (!gimple_nop_p (def_stmt2)
6167 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6168 return false;
6169 }
6170 any = true;
6171 }
6172 }
6173
6174 if (!any)
6175 return true;
6176
6177 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6178 {
355fe088 6179 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6b916b36
RB
6180 if (!gimple_nop_p (def_stmt)
6181 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6182 {
6183 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6184 gsi_remove (&gsi, false);
6185 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6186 }
6187 }
6188
6189 return true;
6190}
6191
ebfd146a
IR
6192/* vectorizable_load.
6193
b8698a0f
L
6194 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6195 can be vectorized.
6196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
6197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6199
6200static bool
355fe088 6201vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
c716e67f 6202 slp_tree slp_node, slp_instance slp_node_instance)
ebfd146a
IR
6203{
6204 tree scalar_dest;
6205 tree vec_dest = NULL;
6206 tree data_ref = NULL;
6207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 6208 stmt_vec_info prev_stmt_info;
ebfd146a 6209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 6210 struct loop *loop = NULL;
ebfd146a 6211 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 6212 bool nested_in_vect_loop = false;
c716e67f 6213 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
272c6793 6214 tree elem_type;
ebfd146a 6215 tree new_temp;
ef4bddc2 6216 machine_mode mode;
355fe088 6217 gimple *new_stmt = NULL;
ebfd146a
IR
6218 tree dummy;
6219 enum dr_alignment_support alignment_support_scheme;
6220 tree dataref_ptr = NULL_TREE;
74bf76ed 6221 tree dataref_offset = NULL_TREE;
355fe088 6222 gimple *ptr_incr = NULL;
ebfd146a 6223 int ncopies;
9b999e8c 6224 int i, j, group_size = -1, group_gap_adj;
ebfd146a
IR
6225 tree msq = NULL_TREE, lsq;
6226 tree offset = NULL_TREE;
356bbc4c 6227 tree byte_offset = NULL_TREE;
ebfd146a 6228 tree realignment_token = NULL_TREE;
538dd0b7 6229 gphi *phi = NULL;
6e1aa848 6230 vec<tree> dr_chain = vNULL;
0d0293ac 6231 bool grouped_load = false;
272c6793 6232 bool load_lanes_p = false;
355fe088 6233 gimple *first_stmt;
4f0a0218 6234 gimple *first_stmt_for_drptr = NULL;
ebfd146a 6235 bool inv_p;
319e6439 6236 bool negative = false;
ebfd146a
IR
6237 bool compute_in_loop = false;
6238 struct loop *at_loop;
6239 int vec_num;
6240 bool slp = (slp_node != NULL);
6241 bool slp_perm = false;
6242 enum tree_code code;
a70d6342
IR
6243 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6244 int vf;
272c6793 6245 tree aggr_type;
aec7ae7d
JJ
6246 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6247 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6248 int gather_scale = 1;
6249 enum vect_def_type gather_dt = vect_unknown_def_type;
310213d4 6250 vec_info *vinfo = stmt_info->vinfo;
a70d6342 6251
465c8c19
JJ
6252 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6253 return false;
6254
66c16fd9
RB
6255 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6256 && ! vec_stmt)
465c8c19
JJ
6257 return false;
6258
6259 /* Is vectorizable load? */
6260 if (!is_gimple_assign (stmt))
6261 return false;
6262
6263 scalar_dest = gimple_assign_lhs (stmt);
6264 if (TREE_CODE (scalar_dest) != SSA_NAME)
6265 return false;
6266
6267 code = gimple_assign_rhs_code (stmt);
6268 if (code != ARRAY_REF
6269 && code != BIT_FIELD_REF
6270 && code != INDIRECT_REF
6271 && code != COMPONENT_REF
6272 && code != IMAGPART_EXPR
6273 && code != REALPART_EXPR
6274 && code != MEM_REF
6275 && TREE_CODE_CLASS (code) != tcc_declaration)
6276 return false;
6277
6278 if (!STMT_VINFO_DATA_REF (stmt_info))
6279 return false;
6280
6281 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6282 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6283
a70d6342
IR
6284 if (loop_vinfo)
6285 {
6286 loop = LOOP_VINFO_LOOP (loop_vinfo);
6287 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6288 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6289 }
6290 else
3533e503 6291 vf = 1;
ebfd146a
IR
6292
6293 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 6294 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a 6295 case of SLP. */
437f4a00 6296 if (slp || PURE_SLP_STMT (stmt_info))
ebfd146a
IR
6297 ncopies = 1;
6298 else
6299 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6300
6301 gcc_assert (ncopies >= 1);
6302
6303 /* FORNOW. This restriction should be relaxed. */
6304 if (nested_in_vect_loop && ncopies > 1)
6305 {
73fbfcad 6306 if (dump_enabled_p ())
78c60e3d 6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6308 "multiple types in nested loop.\n");
ebfd146a
IR
6309 return false;
6310 }
6311
f2556b68
RB
6312 /* Invalidate assumptions made by dependence analysis when vectorization
6313 on the unrolled body effectively re-orders stmts. */
6314 if (ncopies > 1
6315 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6316 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6317 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6318 {
6319 if (dump_enabled_p ())
6320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6321 "cannot perform implicit CSE when unrolling "
6322 "with negative dependence distance\n");
6323 return false;
6324 }
6325
7b7b1813 6326 elem_type = TREE_TYPE (vectype);
947131ba 6327 mode = TYPE_MODE (vectype);
ebfd146a
IR
6328
6329 /* FORNOW. In some cases can vectorize even if data-type not supported
6330 (e.g. - data copies). */
947131ba 6331 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a 6332 {
73fbfcad 6333 if (dump_enabled_p ())
78c60e3d 6334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6335 "Aligned load, but unsupported type.\n");
ebfd146a
IR
6336 return false;
6337 }
6338
ebfd146a 6339 /* Check if the load is a part of an interleaving chain. */
0d0293ac 6340 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 6341 {
0d0293ac 6342 grouped_load = true;
ebfd146a 6343 /* FORNOW */
3bab6342 6344 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
ebfd146a 6345
e14c1050 6346 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
d5f035ea
RB
6347
6348 /* If this is single-element interleaving with an element distance
6349 that leaves unused vector loads around punt - we at least create
6350 very sub-optimal code in that case (and blow up memory,
6351 see PR65518). */
72c0f643 6352 bool force_peeling = false;
d5f035ea 6353 if (first_stmt == stmt
72c0f643
RB
6354 && !GROUP_NEXT_ELEMENT (stmt_info))
6355 {
6356 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6357 {
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6360 "single-element interleaving not supported "
6361 "for not adjacent vector loads\n");
6362 return false;
6363 }
6364
6365 /* Single-element interleaving requires peeling for gaps. */
6366 force_peeling = true;
6367 }
6368
6369 /* If there is a gap in the end of the group or the group size cannot
6370 be made a multiple of the vector element count then we access excess
6371 elements in the last iteration and thus need to peel that off. */
6372 if (loop_vinfo
6373 && ! STMT_VINFO_STRIDED_P (stmt_info)
6374 && (force_peeling
6375 || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6376 || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
d5f035ea
RB
6377 {
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
72c0f643
RB
6380 "Data access with gaps requires scalar "
6381 "epilogue loop\n");
6382 if (loop->inner)
6383 {
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6386 "Peeling for outer loop is not supported\n");
6387 return false;
6388 }
6389
6390 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
d5f035ea
RB
6391 }
6392
b1af7da6
RB
6393 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6394 slp_perm = true;
6395
7b5fc413
RB
6396 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6397 if (!slp
6398 && !PURE_SLP_STMT (stmt_info)
f2e2a985 6399 && !STMT_VINFO_STRIDED_P (stmt_info))
b602d918 6400 {
272c6793
RS
6401 if (vect_load_lanes_supported (vectype, group_size))
6402 load_lanes_p = true;
0d0293ac 6403 else if (!vect_grouped_load_supported (vectype, group_size))
b602d918
RS
6404 return false;
6405 }
f2556b68
RB
6406
6407 /* Invalidate assumptions made by dependence analysis when vectorization
6408 on the unrolled body effectively re-orders stmts. */
6409 if (!PURE_SLP_STMT (stmt_info)
6410 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6411 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6412 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6413 {
6414 if (dump_enabled_p ())
6415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6416 "cannot perform implicit CSE when performing "
6417 "group loads with negative dependence distance\n");
6418 return false;
6419 }
96bb56b2
RB
6420
6421 /* Similarly when the stmt is a load that is both part of a SLP
6422 instance and a loop vectorized stmt via the same-dr mechanism
6423 we have to give up. */
6424 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6425 && (STMT_SLP_TYPE (stmt_info)
6426 != STMT_SLP_TYPE (vinfo_for_stmt
6427 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6428 {
6429 if (dump_enabled_p ())
6430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6431 "conflicting SLP types for CSEd load\n");
6432 return false;
6433 }
ebfd146a
IR
6434 }
6435
a1e53f3f 6436
3bab6342 6437 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
aec7ae7d 6438 {
355fe088 6439 gimple *def_stmt;
3bab6342
AT
6440 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6441 &gather_off, &gather_scale);
aec7ae7d 6442 gcc_assert (gather_decl);
81c40241
RB
6443 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6444 &gather_off_vectype))
aec7ae7d 6445 {
73fbfcad 6446 if (dump_enabled_p ())
78c60e3d 6447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6448 "gather index use not simple.\n");
aec7ae7d
JJ
6449 return false;
6450 }
6451 }
f2e2a985 6452 else if (STMT_VINFO_STRIDED_P (stmt_info))
7b5fc413
RB
6453 {
6454 if ((grouped_load
6455 && (slp || PURE_SLP_STMT (stmt_info)))
6456 && (group_size > nunits
b266b968 6457 || nunits % group_size != 0))
7b5fc413
RB
6458 {
6459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6460 "unhandled strided group load\n");
6461 return false;
6462 }
6463 }
319e6439
RG
6464 else
6465 {
6466 negative = tree_int_cst_compare (nested_in_vect_loop
6467 ? STMT_VINFO_DR_STEP (stmt_info)
6468 : DR_STEP (dr),
6469 size_zero_node) < 0;
6470 if (negative && ncopies > 1)
6471 {
73fbfcad 6472 if (dump_enabled_p ())
78c60e3d 6473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6474 "multiple types with negative step.\n");
319e6439
RG
6475 return false;
6476 }
6477
6478 if (negative)
6479 {
08940f33
RB
6480 if (grouped_load)
6481 {
6482 if (dump_enabled_p ())
6483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6484 "negative step for group load not supported"
6485 "\n");
08940f33
RB
6486 return false;
6487 }
319e6439
RG
6488 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6489 if (alignment_support_scheme != dr_aligned
6490 && alignment_support_scheme != dr_unaligned_supported)
6491 {
73fbfcad 6492 if (dump_enabled_p ())
78c60e3d 6493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 6494 "negative step but alignment required.\n");
319e6439
RG
6495 return false;
6496 }
6497 if (!perm_mask_for_reverse (vectype))
6498 {
73fbfcad 6499 if (dump_enabled_p ())
78c60e3d 6500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
6501 "negative step and reversing not supported."
6502 "\n");
319e6439
RG
6503 return false;
6504 }
6505 }
7d75abc8 6506 }
aec7ae7d 6507
ebfd146a
IR
6508 if (!vec_stmt) /* transformation not required. */
6509 {
6510 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2e8ab70c
RB
6511 /* The SLP costs are calculated during SLP analysis. */
6512 if (!PURE_SLP_STMT (stmt_info))
6513 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6514 NULL, NULL, NULL);
ebfd146a
IR
6515 return true;
6516 }
6517
73fbfcad 6518 if (dump_enabled_p ())
78c60e3d 6519 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6520 "transform load. ncopies = %d\n", ncopies);
ebfd146a
IR
6521
6522 /** Transform. **/
6523
c716e67f
XDL
6524 ensure_base_align (stmt_info, dr);
6525
3bab6342 6526 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
aec7ae7d
JJ
6527 {
6528 tree vec_oprnd0 = NULL_TREE, op;
6529 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6530 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
d3c2fee0 6531 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
aec7ae7d
JJ
6532 edge pe = loop_preheader_edge (loop);
6533 gimple_seq seq;
6534 basic_block new_bb;
6535 enum { NARROW, NONE, WIDEN } modifier;
6536 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6537
6538 if (nunits == gather_off_nunits)
6539 modifier = NONE;
6540 else if (nunits == gather_off_nunits / 2)
6541 {
6542 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6543 modifier = WIDEN;
6544
6545 for (i = 0; i < gather_off_nunits; ++i)
6546 sel[i] = i | nunits;
6547
557be5a8 6548 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
aec7ae7d
JJ
6549 }
6550 else if (nunits == gather_off_nunits * 2)
6551 {
6552 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6553 modifier = NARROW;
6554
6555 for (i = 0; i < nunits; ++i)
6556 sel[i] = i < gather_off_nunits
6557 ? i : i + nunits - gather_off_nunits;
6558
557be5a8 6559 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
aec7ae7d
JJ
6560 ncopies *= 2;
6561 }
6562 else
6563 gcc_unreachable ();
6564
6565 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6566 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6567 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6568 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6569 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6570 scaletype = TREE_VALUE (arglist);
d3c2fee0 6571 gcc_checking_assert (types_compatible_p (srctype, rettype));
aec7ae7d
JJ
6572
6573 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6574
6575 ptr = fold_convert (ptrtype, gather_base);
6576 if (!is_gimple_min_invariant (ptr))
6577 {
6578 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6579 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6580 gcc_assert (!new_bb);
6581 }
6582
6583 /* Currently we support only unconditional gather loads,
6584 so mask should be all ones. */
d3c2fee0
AI
6585 if (TREE_CODE (masktype) == INTEGER_TYPE)
6586 mask = build_int_cst (masktype, -1);
6587 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6588 {
6589 mask = build_int_cst (TREE_TYPE (masktype), -1);
6590 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6591 mask = vect_init_vector (stmt, mask, masktype, NULL);
d3c2fee0 6592 }
aec7ae7d
JJ
6593 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6594 {
6595 REAL_VALUE_TYPE r;
6596 long tmp[6];
6597 for (j = 0; j < 6; ++j)
6598 tmp[j] = -1;
6599 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6600 mask = build_real (TREE_TYPE (masktype), r);
d3c2fee0 6601 mask = build_vector_from_val (masktype, mask);
03b9e8e4 6602 mask = vect_init_vector (stmt, mask, masktype, NULL);
aec7ae7d
JJ
6603 }
6604 else
6605 gcc_unreachable ();
aec7ae7d
JJ
6606
6607 scale = build_int_cst (scaletype, gather_scale);
6608
d3c2fee0
AI
6609 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6610 merge = build_int_cst (TREE_TYPE (rettype), 0);
6611 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6612 {
6613 REAL_VALUE_TYPE r;
6614 long tmp[6];
6615 for (j = 0; j < 6; ++j)
6616 tmp[j] = 0;
6617 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6618 merge = build_real (TREE_TYPE (rettype), r);
6619 }
6620 else
6621 gcc_unreachable ();
6622 merge = build_vector_from_val (rettype, merge);
6623 merge = vect_init_vector (stmt, merge, rettype, NULL);
6624
aec7ae7d
JJ
6625 prev_stmt_info = NULL;
6626 for (j = 0; j < ncopies; ++j)
6627 {
6628 if (modifier == WIDEN && (j & 1))
6629 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6630 perm_mask, stmt, gsi);
6631 else if (j == 0)
6632 op = vec_oprnd0
81c40241 6633 = vect_get_vec_def_for_operand (gather_off, stmt);
aec7ae7d
JJ
6634 else
6635 op = vec_oprnd0
6636 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6637
6638 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6639 {
6640 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6641 == TYPE_VECTOR_SUBPARTS (idxtype));
0e22bb5a 6642 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
aec7ae7d
JJ
6643 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6644 new_stmt
0d0e4a03 6645 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6646 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6647 op = var;
6648 }
6649
6650 new_stmt
d3c2fee0 6651 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
aec7ae7d
JJ
6652
6653 if (!useless_type_conversion_p (vectype, rettype))
6654 {
6655 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6656 == TYPE_VECTOR_SUBPARTS (rettype));
0e22bb5a 6657 op = vect_get_new_ssa_name (rettype, vect_simple_var);
aec7ae7d
JJ
6658 gimple_call_set_lhs (new_stmt, op);
6659 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b731b390 6660 var = make_ssa_name (vec_dest);
aec7ae7d
JJ
6661 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6662 new_stmt
0d0e4a03 6663 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
aec7ae7d
JJ
6664 }
6665 else
6666 {
6667 var = make_ssa_name (vec_dest, new_stmt);
6668 gimple_call_set_lhs (new_stmt, var);
6669 }
6670
6671 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6672
6673 if (modifier == NARROW)
6674 {
6675 if ((j & 1) == 0)
6676 {
6677 prev_res = var;
6678 continue;
6679 }
6680 var = permute_vec_elements (prev_res, var,
6681 perm_mask, stmt, gsi);
6682 new_stmt = SSA_NAME_DEF_STMT (var);
6683 }
6684
6685 if (prev_stmt_info == NULL)
6686 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6687 else
6688 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6689 prev_stmt_info = vinfo_for_stmt (new_stmt);
6690 }
6691 return true;
6692 }
f2e2a985 6693 else if (STMT_VINFO_STRIDED_P (stmt_info))
7d75abc8
MM
6694 {
6695 gimple_stmt_iterator incr_gsi;
6696 bool insert_after;
355fe088 6697 gimple *incr;
7d75abc8 6698 tree offvar;
7d75abc8
MM
6699 tree ivstep;
6700 tree running_off;
9771b263 6701 vec<constructor_elt, va_gc> *v = NULL;
7d75abc8 6702 gimple_seq stmts = NULL;
14ac6aa2
RB
6703 tree stride_base, stride_step, alias_off;
6704
6705 gcc_assert (!nested_in_vect_loop);
7d75abc8 6706
f502d50e 6707 if (slp && grouped_load)
ab313a8c
RB
6708 first_dr = STMT_VINFO_DATA_REF
6709 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6710 else
6711 first_dr = dr;
6712
14ac6aa2
RB
6713 stride_base
6714 = fold_build_pointer_plus
ab313a8c 6715 (DR_BASE_ADDRESS (first_dr),
14ac6aa2 6716 size_binop (PLUS_EXPR,
ab313a8c
RB
6717 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6718 convert_to_ptrofftype (DR_INIT (first_dr))));
6719 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7d75abc8
MM
6720
6721 /* For a load with loop-invariant (but other than power-of-2)
6722 stride (i.e. not a grouped access) like so:
6723
6724 for (i = 0; i < n; i += stride)
6725 ... = array[i];
6726
6727 we generate a new induction variable and new accesses to
6728 form a new vector (or vectors, depending on ncopies):
6729
6730 for (j = 0; ; j += VF*stride)
6731 tmp1 = array[j];
6732 tmp2 = array[j + stride];
6733 ...
6734 vectemp = {tmp1, tmp2, ...}
6735 */
6736
ab313a8c
RB
6737 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6738 build_int_cst (TREE_TYPE (stride_step), vf));
7d75abc8
MM
6739
6740 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6741
ab313a8c 6742 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7d75abc8
MM
6743 loop, &incr_gsi, insert_after,
6744 &offvar, NULL);
6745 incr = gsi_stmt (incr_gsi);
310213d4 6746 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7d75abc8 6747
ab313a8c
RB
6748 stride_step = force_gimple_operand (unshare_expr (stride_step),
6749 &stmts, true, NULL_TREE);
7d75abc8
MM
6750 if (stmts)
6751 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6752
6753 prev_stmt_info = NULL;
6754 running_off = offvar;
ab313a8c 6755 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
7b5fc413
RB
6756 int nloads = nunits;
6757 tree ltype = TREE_TYPE (vectype);
b266b968 6758 auto_vec<tree> dr_chain;
7b5fc413
RB
6759 if (slp)
6760 {
6761 nloads = nunits / group_size;
6762 if (group_size < nunits)
6763 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6764 else
6765 ltype = vectype;
6766 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
66c16fd9
RB
6767 /* For SLP permutation support we need to load the whole group,
6768 not only the number of vector stmts the permutation result
6769 fits in. */
b266b968 6770 if (slp_perm)
66c16fd9
RB
6771 {
6772 ncopies = (group_size * vf + nunits - 1) / nunits;
6773 dr_chain.create (ncopies);
6774 }
6775 else
6776 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7b5fc413 6777 }
7d75abc8
MM
6778 for (j = 0; j < ncopies; j++)
6779 {
6780 tree vec_inv;
6781
7b5fc413
RB
6782 if (nloads > 1)
6783 {
6784 vec_alloc (v, nloads);
6785 for (i = 0; i < nloads; i++)
6786 {
6787 tree newref, newoff;
355fe088 6788 gimple *incr;
7b5fc413
RB
6789 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6790
6791 newref = force_gimple_operand_gsi (gsi, newref, true,
6792 NULL_TREE, true,
6793 GSI_SAME_STMT);
6794 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6795 newoff = copy_ssa_name (running_off);
6796 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6797 running_off, stride_step);
6798 vect_finish_stmt_generation (stmt, incr, gsi);
6799
6800 running_off = newoff;
6801 }
6802
6803 vec_inv = build_constructor (vectype, v);
6804 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6805 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6806 }
6807 else
7d75abc8 6808 {
7b5fc413
RB
6809 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6810 build2 (MEM_REF, ltype,
6811 running_off, alias_off));
6812 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6813
6814 tree newoff = copy_ssa_name (running_off);
355fe088 6815 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
0d0e4a03 6816 running_off, stride_step);
7d75abc8
MM
6817 vect_finish_stmt_generation (stmt, incr, gsi);
6818
6819 running_off = newoff;
6820 }
6821
7b5fc413 6822 if (slp)
b266b968 6823 {
b266b968
RB
6824 if (slp_perm)
6825 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
66c16fd9
RB
6826 else
6827 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b266b968 6828 }
7d75abc8 6829 else
225ce44b
RB
6830 {
6831 if (j == 0)
6832 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6833 else
6834 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6835 prev_stmt_info = vinfo_for_stmt (new_stmt);
6836 }
7d75abc8 6837 }
b266b968
RB
6838 if (slp_perm)
6839 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6840 slp_node_instance, false);
7d75abc8
MM
6841 return true;
6842 }
aec7ae7d 6843
0d0293ac 6844 if (grouped_load)
ebfd146a 6845 {
e14c1050 6846 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4f0a0218 6847 /* For SLP vectorization we directly vectorize a subchain
52eab378
RB
6848 without permutation. */
6849 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4f0a0218
RB
6850 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6851 /* For BB vectorization always use the first stmt to base
6852 the data ref pointer on. */
6853 if (bb_vinfo)
6854 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6aa904c4 6855
ebfd146a 6856 /* Check if the chain of loads is already vectorized. */
01d8bf07
RB
6857 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6858 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6859 ??? But we can only do so if there is exactly one
6860 as we have no way to get at the rest. Leave the CSE
6861 opportunity alone.
6862 ??? With the group load eventually participating
6863 in multiple different permutations (having multiple
6864 slp nodes which refer to the same group) the CSE
6865 is even wrong code. See PR56270. */
6866 && !slp)
ebfd146a
IR
6867 {
6868 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6869 return true;
6870 }
6871 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
e14c1050 6872 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
9b999e8c 6873 group_gap_adj = 0;
ebfd146a
IR
6874
6875 /* VEC_NUM is the number of vect stmts to be created for this group. */
6876 if (slp)
6877 {
0d0293ac 6878 grouped_load = false;
91ff1504
RB
6879 /* For SLP permutation support we need to load the whole group,
6880 not only the number of vector stmts the permutation result
6881 fits in. */
6882 if (slp_perm)
6883 vec_num = (group_size * vf + nunits - 1) / nunits;
6884 else
6885 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9b999e8c 6886 group_gap_adj = vf * group_size - nunits * vec_num;
a70d6342 6887 }
ebfd146a 6888 else
9b999e8c 6889 vec_num = group_size;
ebfd146a
IR
6890 }
6891 else
6892 {
6893 first_stmt = stmt;
6894 first_dr = dr;
6895 group_size = vec_num = 1;
9b999e8c 6896 group_gap_adj = 0;
ebfd146a
IR
6897 }
6898
720f5239 6899 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 6900 gcc_assert (alignment_support_scheme);
272c6793
RS
6901 /* Targets with load-lane instructions must not require explicit
6902 realignment. */
6903 gcc_assert (!load_lanes_p
6904 || alignment_support_scheme == dr_aligned
6905 || alignment_support_scheme == dr_unaligned_supported);
ebfd146a
IR
6906
6907 /* In case the vectorization factor (VF) is bigger than the number
6908 of elements that we can fit in a vectype (nunits), we have to generate
6909 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 6910 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 6911 from one copy of the vector stmt to the next, in the field
ff802fa1 6912 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 6913 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
6914 stmts that use the defs of the current stmt. The example below
6915 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6916 need to create 4 vectorized stmts):
ebfd146a
IR
6917
6918 before vectorization:
6919 RELATED_STMT VEC_STMT
6920 S1: x = memref - -
6921 S2: z = x + 1 - -
6922
6923 step 1: vectorize stmt S1:
6924 We first create the vector stmt VS1_0, and, as usual, record a
6925 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6926 Next, we create the vector stmt VS1_1, and record a pointer to
6927 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 6928 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
6929 stmts and pointers:
6930 RELATED_STMT VEC_STMT
6931 VS1_0: vx0 = memref0 VS1_1 -
6932 VS1_1: vx1 = memref1 VS1_2 -
6933 VS1_2: vx2 = memref2 VS1_3 -
6934 VS1_3: vx3 = memref3 - -
6935 S1: x = load - VS1_0
6936 S2: z = x + 1 - -
6937
b8698a0f
L
6938 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6939 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
6940 stmt S2. */
6941
0d0293ac 6942 /* In case of interleaving (non-unit grouped access):
ebfd146a
IR
6943
6944 S1: x2 = &base + 2
6945 S2: x0 = &base
6946 S3: x1 = &base + 1
6947 S4: x3 = &base + 3
6948
b8698a0f 6949 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
6950 starting from the access of the first stmt of the chain:
6951
6952 VS1: vx0 = &base
6953 VS2: vx1 = &base + vec_size*1
6954 VS3: vx3 = &base + vec_size*2
6955 VS4: vx4 = &base + vec_size*3
6956
6957 Then permutation statements are generated:
6958
e2c83630
RH
6959 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6960 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
ebfd146a
IR
6961 ...
6962
6963 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6964 (the order of the data-refs in the output of vect_permute_load_chain
6965 corresponds to the order of scalar stmts in the interleaving chain - see
6966 the documentation of vect_permute_load_chain()).
6967 The generation of permutation stmts and recording them in
0d0293ac 6968 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
ebfd146a 6969
b8698a0f 6970 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
6971 permutation stmts above are created for every copy. The result vector
6972 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6973 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
6974
6975 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6976 on a target that supports unaligned accesses (dr_unaligned_supported)
6977 we generate the following code:
6978 p = initial_addr;
6979 indx = 0;
6980 loop {
6981 p = p + indx * vectype_size;
6982 vec_dest = *(p);
6983 indx = indx + 1;
6984 }
6985
6986 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 6987 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
6988 then generate the following code, in which the data in each iteration is
6989 obtained by two vector loads, one from the previous iteration, and one
6990 from the current iteration:
6991 p1 = initial_addr;
6992 msq_init = *(floor(p1))
6993 p2 = initial_addr + VS - 1;
6994 realignment_token = call target_builtin;
6995 indx = 0;
6996 loop {
6997 p2 = p2 + indx * vectype_size
6998 lsq = *(floor(p2))
6999 vec_dest = realign_load (msq, lsq, realignment_token)
7000 indx = indx + 1;
7001 msq = lsq;
7002 } */
7003
7004 /* If the misalignment remains the same throughout the execution of the
7005 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 7006 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
7007 This can only occur when vectorizing memory accesses in the inner-loop
7008 nested within an outer-loop that is being vectorized. */
7009
d1e4b493 7010 if (nested_in_vect_loop
211bea38 7011 && (TREE_INT_CST_LOW (DR_STEP (dr))
ebfd146a
IR
7012 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7013 {
7014 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7015 compute_in_loop = true;
7016 }
7017
7018 if ((alignment_support_scheme == dr_explicit_realign_optimized
7019 || alignment_support_scheme == dr_explicit_realign)
59fd17e3 7020 && !compute_in_loop)
ebfd146a
IR
7021 {
7022 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7023 alignment_support_scheme, NULL_TREE,
7024 &at_loop);
7025 if (alignment_support_scheme == dr_explicit_realign_optimized)
7026 {
538dd0b7 7027 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
356bbc4c
JJ
7028 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7029 size_one_node);
ebfd146a
IR
7030 }
7031 }
7032 else
7033 at_loop = loop;
7034
a1e53f3f
L
7035 if (negative)
7036 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7037
272c6793
RS
7038 if (load_lanes_p)
7039 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7040 else
7041 aggr_type = vectype;
7042
ebfd146a
IR
7043 prev_stmt_info = NULL;
7044 for (j = 0; j < ncopies; j++)
b8698a0f 7045 {
272c6793 7046 /* 1. Create the vector or array pointer update chain. */
ebfd146a 7047 if (j == 0)
74bf76ed
JJ
7048 {
7049 bool simd_lane_access_p
7050 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7051 if (simd_lane_access_p
7052 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7053 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7054 && integer_zerop (DR_OFFSET (first_dr))
7055 && integer_zerop (DR_INIT (first_dr))
7056 && alias_sets_conflict_p (get_alias_set (aggr_type),
7057 get_alias_set (DR_REF (first_dr)))
7058 && (alignment_support_scheme == dr_aligned
7059 || alignment_support_scheme == dr_unaligned_supported))
7060 {
7061 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7062 dataref_offset = build_int_cst (reference_alias_ptr_type
7063 (DR_REF (first_dr)), 0);
8928eff3 7064 inv_p = false;
74bf76ed 7065 }
4f0a0218
RB
7066 else if (first_stmt_for_drptr
7067 && first_stmt != first_stmt_for_drptr)
7068 {
7069 dataref_ptr
7070 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7071 at_loop, offset, &dummy, gsi,
7072 &ptr_incr, simd_lane_access_p,
7073 &inv_p, byte_offset);
7074 /* Adjust the pointer by the difference to first_stmt. */
7075 data_reference_p ptrdr
7076 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7077 tree diff = fold_convert (sizetype,
7078 size_binop (MINUS_EXPR,
7079 DR_INIT (first_dr),
7080 DR_INIT (ptrdr)));
7081 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7082 stmt, diff);
7083 }
74bf76ed
JJ
7084 else
7085 dataref_ptr
7086 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7087 offset, &dummy, gsi, &ptr_incr,
356bbc4c
JJ
7088 simd_lane_access_p, &inv_p,
7089 byte_offset);
74bf76ed
JJ
7090 }
7091 else if (dataref_offset)
7092 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7093 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7094 else
272c6793
RS
7095 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7096 TYPE_SIZE_UNIT (aggr_type));
ebfd146a 7097
0d0293ac 7098 if (grouped_load || slp_perm)
9771b263 7099 dr_chain.create (vec_num);
5ce1ee7f 7100
272c6793 7101 if (load_lanes_p)
ebfd146a 7102 {
272c6793
RS
7103 tree vec_array;
7104
7105 vec_array = create_vector_array (vectype, vec_num);
7106
7107 /* Emit:
7108 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7109 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7110 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7111 gimple_call_set_lhs (new_stmt, vec_array);
7112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a 7113
272c6793
RS
7114 /* Extract each vector into an SSA_NAME. */
7115 for (i = 0; i < vec_num; i++)
ebfd146a 7116 {
272c6793
RS
7117 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7118 vec_array, i);
9771b263 7119 dr_chain.quick_push (new_temp);
272c6793
RS
7120 }
7121
7122 /* Record the mapping between SSA_NAMEs and statements. */
0d0293ac 7123 vect_record_grouped_load_vectors (stmt, dr_chain);
272c6793
RS
7124 }
7125 else
7126 {
7127 for (i = 0; i < vec_num; i++)
7128 {
7129 if (i > 0)
7130 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7131 stmt, NULL_TREE);
7132
7133 /* 2. Create the vector-load in the loop. */
7134 switch (alignment_support_scheme)
7135 {
7136 case dr_aligned:
7137 case dr_unaligned_supported:
be1ac4ec 7138 {
644ffefd
MJ
7139 unsigned int align, misalign;
7140
272c6793 7141 data_ref
aed93b23
RB
7142 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7143 dataref_offset
7144 ? dataref_offset
7145 : build_int_cst (reference_alias_ptr_type
7146 (DR_REF (first_dr)), 0));
644ffefd 7147 align = TYPE_ALIGN_UNIT (vectype);
272c6793
RS
7148 if (alignment_support_scheme == dr_aligned)
7149 {
7150 gcc_assert (aligned_access_p (first_dr));
644ffefd 7151 misalign = 0;
272c6793
RS
7152 }
7153 else if (DR_MISALIGNMENT (first_dr) == -1)
7154 {
52639a61
RB
7155 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7156 align = TYPE_ALIGN_UNIT (elem_type);
7157 else
7158 align = (get_object_alignment (DR_REF (first_dr))
7159 / BITS_PER_UNIT);
7160 misalign = 0;
272c6793
RS
7161 TREE_TYPE (data_ref)
7162 = build_aligned_type (TREE_TYPE (data_ref),
52639a61 7163 align * BITS_PER_UNIT);
272c6793
RS
7164 }
7165 else
7166 {
7167 TREE_TYPE (data_ref)
7168 = build_aligned_type (TREE_TYPE (data_ref),
7169 TYPE_ALIGN (elem_type));
644ffefd 7170 misalign = DR_MISALIGNMENT (first_dr);
272c6793 7171 }
aed93b23
RB
7172 if (dataref_offset == NULL_TREE
7173 && TREE_CODE (dataref_ptr) == SSA_NAME)
74bf76ed
JJ
7174 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7175 align, misalign);
272c6793 7176 break;
be1ac4ec 7177 }
272c6793 7178 case dr_explicit_realign:
267d3070 7179 {
272c6793 7180 tree ptr, bump;
272c6793 7181
d88981fc 7182 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
272c6793
RS
7183
7184 if (compute_in_loop)
7185 msq = vect_setup_realignment (first_stmt, gsi,
7186 &realignment_token,
7187 dr_explicit_realign,
7188 dataref_ptr, NULL);
7189
aed93b23
RB
7190 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7191 ptr = copy_ssa_name (dataref_ptr);
7192 else
7193 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
0d0e4a03
JJ
7194 new_stmt = gimple_build_assign
7195 (ptr, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7196 build_int_cst
7197 (TREE_TYPE (dataref_ptr),
7198 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
7199 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7200 data_ref
7201 = build2 (MEM_REF, vectype, ptr,
7202 build_int_cst (reference_alias_ptr_type
7203 (DR_REF (first_dr)), 0));
7204 vec_dest = vect_create_destination_var (scalar_dest,
7205 vectype);
7206 new_stmt = gimple_build_assign (vec_dest, data_ref);
7207 new_temp = make_ssa_name (vec_dest, new_stmt);
7208 gimple_assign_set_lhs (new_stmt, new_temp);
7209 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7210 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7211 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7212 msq = new_temp;
7213
d88981fc 7214 bump = size_binop (MULT_EXPR, vs,
7b7b1813 7215 TYPE_SIZE_UNIT (elem_type));
d88981fc 7216 bump = size_binop (MINUS_EXPR, bump, size_one_node);
272c6793 7217 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
0d0e4a03
JJ
7218 new_stmt = gimple_build_assign
7219 (NULL_TREE, BIT_AND_EXPR, ptr,
272c6793
RS
7220 build_int_cst
7221 (TREE_TYPE (ptr),
7222 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
aed93b23 7223 ptr = copy_ssa_name (ptr, new_stmt);
272c6793
RS
7224 gimple_assign_set_lhs (new_stmt, ptr);
7225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7226 data_ref
7227 = build2 (MEM_REF, vectype, ptr,
7228 build_int_cst (reference_alias_ptr_type
7229 (DR_REF (first_dr)), 0));
7230 break;
267d3070 7231 }
272c6793 7232 case dr_explicit_realign_optimized:
aed93b23
RB
7233 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7234 new_temp = copy_ssa_name (dataref_ptr);
7235 else
7236 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
0d0e4a03
JJ
7237 new_stmt = gimple_build_assign
7238 (new_temp, BIT_AND_EXPR, dataref_ptr,
272c6793
RS
7239 build_int_cst
7240 (TREE_TYPE (dataref_ptr),
7241 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
272c6793
RS
7242 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7243 data_ref
7244 = build2 (MEM_REF, vectype, new_temp,
7245 build_int_cst (reference_alias_ptr_type
7246 (DR_REF (first_dr)), 0));
7247 break;
7248 default:
7249 gcc_unreachable ();
7250 }
ebfd146a 7251 vec_dest = vect_create_destination_var (scalar_dest, vectype);
272c6793 7252 new_stmt = gimple_build_assign (vec_dest, data_ref);
ebfd146a
IR
7253 new_temp = make_ssa_name (vec_dest, new_stmt);
7254 gimple_assign_set_lhs (new_stmt, new_temp);
7255 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7256
272c6793
RS
7257 /* 3. Handle explicit realignment if necessary/supported.
7258 Create in loop:
7259 vec_dest = realign_load (msq, lsq, realignment_token) */
7260 if (alignment_support_scheme == dr_explicit_realign_optimized
7261 || alignment_support_scheme == dr_explicit_realign)
ebfd146a 7262 {
272c6793
RS
7263 lsq = gimple_assign_lhs (new_stmt);
7264 if (!realignment_token)
7265 realignment_token = dataref_ptr;
7266 vec_dest = vect_create_destination_var (scalar_dest, vectype);
0d0e4a03
JJ
7267 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7268 msq, lsq, realignment_token);
272c6793
RS
7269 new_temp = make_ssa_name (vec_dest, new_stmt);
7270 gimple_assign_set_lhs (new_stmt, new_temp);
7271 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7272
7273 if (alignment_support_scheme == dr_explicit_realign_optimized)
7274 {
7275 gcc_assert (phi);
7276 if (i == vec_num - 1 && j == ncopies - 1)
7277 add_phi_arg (phi, lsq,
7278 loop_latch_edge (containing_loop),
9e227d60 7279 UNKNOWN_LOCATION);
272c6793
RS
7280 msq = lsq;
7281 }
ebfd146a 7282 }
ebfd146a 7283
59fd17e3
RB
7284 /* 4. Handle invariant-load. */
7285 if (inv_p && !bb_vinfo)
7286 {
59fd17e3 7287 gcc_assert (!grouped_load);
d1417442
JJ
7288 /* If we have versioned for aliasing or the loop doesn't
7289 have any data dependencies that would preclude this,
7290 then we are sure this is a loop invariant load and
7291 thus we can insert it on the preheader edge. */
7292 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7293 && !nested_in_vect_loop
6b916b36 7294 && hoist_defs_of_uses (stmt, loop))
a0e35eb0
RB
7295 {
7296 if (dump_enabled_p ())
7297 {
7298 dump_printf_loc (MSG_NOTE, vect_location,
7299 "hoisting out of the vectorized "
7300 "loop: ");
7301 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a0e35eb0 7302 }
b731b390 7303 tree tem = copy_ssa_name (scalar_dest);
a0e35eb0
RB
7304 gsi_insert_on_edge_immediate
7305 (loop_preheader_edge (loop),
7306 gimple_build_assign (tem,
7307 unshare_expr
7308 (gimple_assign_rhs1 (stmt))));
7309 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
34cd48e5
RB
7310 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7311 set_vinfo_for_stmt (new_stmt,
7312 new_stmt_vec_info (new_stmt, vinfo));
a0e35eb0
RB
7313 }
7314 else
7315 {
7316 gimple_stmt_iterator gsi2 = *gsi;
7317 gsi_next (&gsi2);
7318 new_temp = vect_init_vector (stmt, scalar_dest,
7319 vectype, &gsi2);
34cd48e5 7320 new_stmt = SSA_NAME_DEF_STMT (new_temp);
a0e35eb0 7321 }
59fd17e3
RB
7322 }
7323
272c6793
RS
7324 if (negative)
7325 {
aec7ae7d
JJ
7326 tree perm_mask = perm_mask_for_reverse (vectype);
7327 new_temp = permute_vec_elements (new_temp, new_temp,
7328 perm_mask, stmt, gsi);
ebfd146a
IR
7329 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7330 }
267d3070 7331
272c6793 7332 /* Collect vector loads and later create their permutation in
0d0293ac
MM
7333 vect_transform_grouped_load (). */
7334 if (grouped_load || slp_perm)
9771b263 7335 dr_chain.quick_push (new_temp);
267d3070 7336
272c6793
RS
7337 /* Store vector loads in the corresponding SLP_NODE. */
7338 if (slp && !slp_perm)
9771b263 7339 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
272c6793 7340 }
9b999e8c
RB
7341 /* Bump the vector pointer to account for a gap or for excess
7342 elements loaded for a permuted SLP load. */
7343 if (group_gap_adj != 0)
a64b9c26 7344 {
9b999e8c
RB
7345 bool ovf;
7346 tree bump
7347 = wide_int_to_tree (sizetype,
7348 wi::smul (TYPE_SIZE_UNIT (elem_type),
7349 group_gap_adj, &ovf));
a64b9c26
RB
7350 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7351 stmt, bump);
7352 }
ebfd146a
IR
7353 }
7354
7355 if (slp && !slp_perm)
7356 continue;
7357
7358 if (slp_perm)
7359 {
01d8bf07 7360 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
ebfd146a
IR
7361 slp_node_instance, false))
7362 {
9771b263 7363 dr_chain.release ();
ebfd146a
IR
7364 return false;
7365 }
7366 }
7367 else
7368 {
0d0293ac 7369 if (grouped_load)
ebfd146a 7370 {
272c6793 7371 if (!load_lanes_p)
0d0293ac 7372 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
ebfd146a 7373 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
ebfd146a
IR
7374 }
7375 else
7376 {
7377 if (j == 0)
7378 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7379 else
7380 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7381 prev_stmt_info = vinfo_for_stmt (new_stmt);
7382 }
7383 }
9771b263 7384 dr_chain.release ();
ebfd146a
IR
7385 }
7386
ebfd146a
IR
7387 return true;
7388}
7389
7390/* Function vect_is_simple_cond.
b8698a0f 7391
ebfd146a
IR
7392 Input:
7393 LOOP - the loop that is being vectorized.
7394 COND - Condition that is checked for simple use.
7395
e9e1d143
RG
7396 Output:
7397 *COMP_VECTYPE - the vector type for the comparison.
7398
ebfd146a
IR
7399 Returns whether a COND can be vectorized. Checks whether
7400 condition operands are supportable using vec_is_simple_use. */
7401
87aab9b2 7402static bool
81c40241 7403vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
ebfd146a
IR
7404{
7405 tree lhs, rhs;
ebfd146a 7406 enum vect_def_type dt;
e9e1d143 7407 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
ebfd146a 7408
a414c77f
IE
7409 /* Mask case. */
7410 if (TREE_CODE (cond) == SSA_NAME
7411 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7412 {
7413 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7414 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7415 &dt, comp_vectype)
7416 || !*comp_vectype
7417 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7418 return false;
7419 return true;
7420 }
7421
ebfd146a
IR
7422 if (!COMPARISON_CLASS_P (cond))
7423 return false;
7424
7425 lhs = TREE_OPERAND (cond, 0);
7426 rhs = TREE_OPERAND (cond, 1);
7427
7428 if (TREE_CODE (lhs) == SSA_NAME)
7429 {
355fe088 7430 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
81c40241 7431 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
ebfd146a
IR
7432 return false;
7433 }
7434 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7435 && TREE_CODE (lhs) != FIXED_CST)
7436 return false;
7437
7438 if (TREE_CODE (rhs) == SSA_NAME)
7439 {
355fe088 7440 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
81c40241 7441 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
ebfd146a
IR
7442 return false;
7443 }
f7e531cf 7444 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
ebfd146a
IR
7445 && TREE_CODE (rhs) != FIXED_CST)
7446 return false;
7447
28b33016
IE
7448 if (vectype1 && vectype2
7449 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7450 return false;
7451
e9e1d143 7452 *comp_vectype = vectype1 ? vectype1 : vectype2;
ebfd146a
IR
7453 return true;
7454}
7455
7456/* vectorizable_condition.
7457
b8698a0f
L
7458 Check if STMT is conditional modify expression that can be vectorized.
7459 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7460 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
7461 at GSI.
7462
7463 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7464 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
0ad23163 7465 else clause if it is 2).
ebfd146a
IR
7466
7467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7468
4bbe8262 7469bool
355fe088
TS
7470vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7471 gimple **vec_stmt, tree reduc_def, int reduc_index,
f7e531cf 7472 slp_tree slp_node)
ebfd146a
IR
7473{
7474 tree scalar_dest = NULL_TREE;
7475 tree vec_dest = NULL_TREE;
ebfd146a
IR
7476 tree cond_expr, then_clause, else_clause;
7477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
df11cc78 7478 tree comp_vectype = NULL_TREE;
ff802fa1
IR
7479 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7480 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5958f9e2 7481 tree vec_compare;
ebfd146a
IR
7482 tree new_temp;
7483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a855b1b1 7484 enum vect_def_type dt, dts[4];
f7e531cf 7485 int ncopies;
ebfd146a 7486 enum tree_code code;
a855b1b1 7487 stmt_vec_info prev_stmt_info = NULL;
f7e531cf
IR
7488 int i, j;
7489 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6e1aa848
DN
7490 vec<tree> vec_oprnds0 = vNULL;
7491 vec<tree> vec_oprnds1 = vNULL;
7492 vec<tree> vec_oprnds2 = vNULL;
7493 vec<tree> vec_oprnds3 = vNULL;
74946978 7494 tree vec_cmp_type;
a414c77f 7495 bool masked = false;
b8698a0f 7496
f7e531cf
IR
7497 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7498 return false;
7499
af29617a
AH
7500 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7501 {
7502 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7503 return false;
ebfd146a 7504
af29617a
AH
7505 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7506 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7507 && reduc_def))
7508 return false;
ebfd146a 7509
af29617a
AH
7510 /* FORNOW: not yet supported. */
7511 if (STMT_VINFO_LIVE_P (stmt_info))
7512 {
7513 if (dump_enabled_p ())
7514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7515 "value used after loop.\n");
7516 return false;
7517 }
ebfd146a
IR
7518 }
7519
7520 /* Is vectorizable conditional operation? */
7521 if (!is_gimple_assign (stmt))
7522 return false;
7523
7524 code = gimple_assign_rhs_code (stmt);
7525
7526 if (code != COND_EXPR)
7527 return false;
7528
465c8c19
JJ
7529 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7530 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2947d3b2 7531 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
465c8c19
JJ
7532
7533 if (slp_node || PURE_SLP_STMT (stmt_info))
7534 ncopies = 1;
7535 else
7536 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7537
7538 gcc_assert (ncopies >= 1);
7539 if (reduc_index && ncopies > 1)
7540 return false; /* FORNOW */
7541
4e71066d
RG
7542 cond_expr = gimple_assign_rhs1 (stmt);
7543 then_clause = gimple_assign_rhs2 (stmt);
7544 else_clause = gimple_assign_rhs3 (stmt);
ebfd146a 7545
81c40241 7546 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
e9e1d143 7547 || !comp_vectype)
ebfd146a
IR
7548 return false;
7549
81c40241 7550 gimple *def_stmt;
2947d3b2
IE
7551 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7552 &vectype1))
7553 return false;
7554 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7555 &vectype2))
ebfd146a 7556 return false;
2947d3b2
IE
7557
7558 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7559 return false;
7560
7561 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
ebfd146a
IR
7562 return false;
7563
28b33016
IE
7564 masked = !COMPARISON_CLASS_P (cond_expr);
7565 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7566
74946978
MP
7567 if (vec_cmp_type == NULL_TREE)
7568 return false;
784fb9b3 7569
b8698a0f 7570 if (!vec_stmt)
ebfd146a
IR
7571 {
7572 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
e9e1d143 7573 return expand_vec_cond_expr_p (vectype, comp_vectype);
ebfd146a
IR
7574 }
7575
f7e531cf
IR
7576 /* Transform. */
7577
7578 if (!slp_node)
7579 {
9771b263
DN
7580 vec_oprnds0.create (1);
7581 vec_oprnds1.create (1);
7582 vec_oprnds2.create (1);
7583 vec_oprnds3.create (1);
f7e531cf 7584 }
ebfd146a
IR
7585
7586 /* Handle def. */
7587 scalar_dest = gimple_assign_lhs (stmt);
7588 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7589
7590 /* Handle cond expr. */
a855b1b1
MM
7591 for (j = 0; j < ncopies; j++)
7592 {
538dd0b7 7593 gassign *new_stmt = NULL;
a855b1b1
MM
7594 if (j == 0)
7595 {
f7e531cf
IR
7596 if (slp_node)
7597 {
00f96dc9
TS
7598 auto_vec<tree, 4> ops;
7599 auto_vec<vec<tree>, 4> vec_defs;
9771b263 7600
a414c77f
IE
7601 if (masked)
7602 ops.safe_push (cond_expr);
7603 else
7604 {
7605 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7606 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7607 }
9771b263
DN
7608 ops.safe_push (then_clause);
7609 ops.safe_push (else_clause);
f7e531cf 7610 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
37b5ec8f
JJ
7611 vec_oprnds3 = vec_defs.pop ();
7612 vec_oprnds2 = vec_defs.pop ();
a414c77f
IE
7613 if (!masked)
7614 vec_oprnds1 = vec_defs.pop ();
37b5ec8f 7615 vec_oprnds0 = vec_defs.pop ();
f7e531cf 7616
9771b263
DN
7617 ops.release ();
7618 vec_defs.release ();
f7e531cf
IR
7619 }
7620 else
7621 {
355fe088 7622 gimple *gtemp;
a414c77f
IE
7623 if (masked)
7624 {
7625 vec_cond_lhs
7626 = vect_get_vec_def_for_operand (cond_expr, stmt,
7627 comp_vectype);
7628 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7629 &gtemp, &dts[0]);
7630 }
7631 else
7632 {
7633 vec_cond_lhs =
7634 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7635 stmt, comp_vectype);
7636 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7637 loop_vinfo, &gtemp, &dts[0]);
7638
7639 vec_cond_rhs =
7640 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7641 stmt, comp_vectype);
7642 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7643 loop_vinfo, &gtemp, &dts[1]);
7644 }
f7e531cf
IR
7645 if (reduc_index == 1)
7646 vec_then_clause = reduc_def;
7647 else
7648 {
7649 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
81c40241
RB
7650 stmt);
7651 vect_is_simple_use (then_clause, loop_vinfo,
7652 &gtemp, &dts[2]);
f7e531cf
IR
7653 }
7654 if (reduc_index == 2)
7655 vec_else_clause = reduc_def;
7656 else
7657 {
7658 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
81c40241
RB
7659 stmt);
7660 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
f7e531cf 7661 }
a855b1b1
MM
7662 }
7663 }
7664 else
7665 {
a414c77f
IE
7666 vec_cond_lhs
7667 = vect_get_vec_def_for_stmt_copy (dts[0],
7668 vec_oprnds0.pop ());
7669 if (!masked)
7670 vec_cond_rhs
7671 = vect_get_vec_def_for_stmt_copy (dts[1],
7672 vec_oprnds1.pop ());
7673
a855b1b1 7674 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
9771b263 7675 vec_oprnds2.pop ());
a855b1b1 7676 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
9771b263 7677 vec_oprnds3.pop ());
f7e531cf
IR
7678 }
7679
7680 if (!slp_node)
7681 {
9771b263 7682 vec_oprnds0.quick_push (vec_cond_lhs);
a414c77f
IE
7683 if (!masked)
7684 vec_oprnds1.quick_push (vec_cond_rhs);
9771b263
DN
7685 vec_oprnds2.quick_push (vec_then_clause);
7686 vec_oprnds3.quick_push (vec_else_clause);
a855b1b1
MM
7687 }
7688
9dc3f7de 7689 /* Arguments are ready. Create the new vector stmt. */
9771b263 7690 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f7e531cf 7691 {
9771b263
DN
7692 vec_then_clause = vec_oprnds2[i];
7693 vec_else_clause = vec_oprnds3[i];
a855b1b1 7694
a414c77f
IE
7695 if (masked)
7696 vec_compare = vec_cond_lhs;
7697 else
7698 {
7699 vec_cond_rhs = vec_oprnds1[i];
7700 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7701 vec_cond_lhs, vec_cond_rhs);
7702 }
5958f9e2
JJ
7703 new_temp = make_ssa_name (vec_dest);
7704 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7705 vec_compare, vec_then_clause,
7706 vec_else_clause);
f7e531cf
IR
7707 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7708 if (slp_node)
9771b263 7709 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f7e531cf
IR
7710 }
7711
7712 if (slp_node)
7713 continue;
7714
7715 if (j == 0)
7716 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7717 else
7718 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7719
7720 prev_stmt_info = vinfo_for_stmt (new_stmt);
a855b1b1 7721 }
b8698a0f 7722
9771b263
DN
7723 vec_oprnds0.release ();
7724 vec_oprnds1.release ();
7725 vec_oprnds2.release ();
7726 vec_oprnds3.release ();
f7e531cf 7727
ebfd146a
IR
7728 return true;
7729}
7730
42fd8198
IE
7731/* vectorizable_comparison.
7732
7733 Check if STMT is comparison expression that can be vectorized.
7734 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7735 comparison, put it in VEC_STMT, and insert it at GSI.
7736
7737 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7738
7739bool
7740vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7741 gimple **vec_stmt, tree reduc_def,
7742 slp_tree slp_node)
7743{
7744 tree lhs, rhs1, rhs2;
7745 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7746 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7747 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7748 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7749 tree new_temp;
7750 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7751 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7752 unsigned nunits;
7753 int ncopies;
7754 enum tree_code code;
7755 stmt_vec_info prev_stmt_info = NULL;
7756 int i, j;
7757 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7758 vec<tree> vec_oprnds0 = vNULL;
7759 vec<tree> vec_oprnds1 = vNULL;
7760 gimple *def_stmt;
7761 tree mask_type;
7762 tree mask;
7763
c245362b
IE
7764 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7765 return false;
7766
42fd8198
IE
7767 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
7768 return false;
7769
7770 mask_type = vectype;
7771 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7772
7773 if (slp_node || PURE_SLP_STMT (stmt_info))
7774 ncopies = 1;
7775 else
7776 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7777
7778 gcc_assert (ncopies >= 1);
42fd8198
IE
7779 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7780 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7781 && reduc_def))
7782 return false;
7783
7784 if (STMT_VINFO_LIVE_P (stmt_info))
7785 {
7786 if (dump_enabled_p ())
7787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7788 "value used after loop.\n");
7789 return false;
7790 }
7791
7792 if (!is_gimple_assign (stmt))
7793 return false;
7794
7795 code = gimple_assign_rhs_code (stmt);
7796
7797 if (TREE_CODE_CLASS (code) != tcc_comparison)
7798 return false;
7799
7800 rhs1 = gimple_assign_rhs1 (stmt);
7801 rhs2 = gimple_assign_rhs2 (stmt);
7802
7803 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7804 &dts[0], &vectype1))
7805 return false;
7806
7807 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7808 &dts[1], &vectype2))
7809 return false;
7810
7811 if (vectype1 && vectype2
7812 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7813 return false;
7814
7815 vectype = vectype1 ? vectype1 : vectype2;
7816
7817 /* Invariant comparison. */
7818 if (!vectype)
7819 {
7820 vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
7821 if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
7822 return false;
7823 }
7824 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7825 return false;
7826
7827 if (!vec_stmt)
7828 {
7829 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7830 vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
7831 return expand_vec_cmp_expr_p (vectype, mask_type);
7832 }
7833
7834 /* Transform. */
7835 if (!slp_node)
7836 {
7837 vec_oprnds0.create (1);
7838 vec_oprnds1.create (1);
7839 }
7840
7841 /* Handle def. */
7842 lhs = gimple_assign_lhs (stmt);
7843 mask = vect_create_destination_var (lhs, mask_type);
7844
7845 /* Handle cmp expr. */
7846 for (j = 0; j < ncopies; j++)
7847 {
7848 gassign *new_stmt = NULL;
7849 if (j == 0)
7850 {
7851 if (slp_node)
7852 {
7853 auto_vec<tree, 2> ops;
7854 auto_vec<vec<tree>, 2> vec_defs;
7855
7856 ops.safe_push (rhs1);
7857 ops.safe_push (rhs2);
7858 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7859 vec_oprnds1 = vec_defs.pop ();
7860 vec_oprnds0 = vec_defs.pop ();
7861 }
7862 else
7863 {
e4af0bc4
IE
7864 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7865 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
42fd8198
IE
7866 }
7867 }
7868 else
7869 {
7870 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7871 vec_oprnds0.pop ());
7872 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7873 vec_oprnds1.pop ());
7874 }
7875
7876 if (!slp_node)
7877 {
7878 vec_oprnds0.quick_push (vec_rhs1);
7879 vec_oprnds1.quick_push (vec_rhs2);
7880 }
7881
7882 /* Arguments are ready. Create the new vector stmt. */
7883 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7884 {
7885 vec_rhs2 = vec_oprnds1[i];
7886
7887 new_temp = make_ssa_name (mask);
7888 new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
7889 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7890 if (slp_node)
7891 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7892 }
7893
7894 if (slp_node)
7895 continue;
7896
7897 if (j == 0)
7898 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7899 else
7900 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7901
7902 prev_stmt_info = vinfo_for_stmt (new_stmt);
7903 }
7904
7905 vec_oprnds0.release ();
7906 vec_oprnds1.release ();
7907
7908 return true;
7909}
ebfd146a 7910
8644a673 7911/* Make sure the statement is vectorizable. */
ebfd146a
IR
7912
7913bool
355fe088 7914vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 7915{
8644a673 7916 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 7917 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 7918 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 7919 bool ok;
a70d6342 7920 tree scalar_type, vectype;
355fe088 7921 gimple *pattern_stmt;
363477c0 7922 gimple_seq pattern_def_seq;
ebfd146a 7923
73fbfcad 7924 if (dump_enabled_p ())
ebfd146a 7925 {
78c60e3d
SS
7926 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7927 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8644a673 7928 }
ebfd146a 7929
1825a1f3 7930 if (gimple_has_volatile_ops (stmt))
b8698a0f 7931 {
73fbfcad 7932 if (dump_enabled_p ())
78c60e3d 7933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 7934 "not vectorized: stmt has volatile operands\n");
1825a1f3
IR
7935
7936 return false;
7937 }
b8698a0f
L
7938
7939 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
7940 to include:
7941 - the COND_EXPR which is the loop exit condition
7942 - any LABEL_EXPRs in the loop
b8698a0f 7943 - computations that are used only for array indexing or loop control.
8644a673 7944 In basic blocks we only analyze statements that are a part of some SLP
83197f37 7945 instance, therefore, all the statements are relevant.
ebfd146a 7946
d092494c 7947 Pattern statement needs to be analyzed instead of the original statement
83197f37 7948 if the original statement is not relevant. Otherwise, we analyze both
079c527f
JJ
7949 statements. In basic blocks we are called from some SLP instance
7950 traversal, don't analyze pattern stmts instead, the pattern stmts
7951 already will be part of SLP instance. */
83197f37
IR
7952
7953 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
b8698a0f 7954 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 7955 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a 7956 {
9d5e7640 7957 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
83197f37 7958 && pattern_stmt
9d5e7640
IR
7959 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7960 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7961 {
83197f37 7962 /* Analyze PATTERN_STMT instead of the original stmt. */
9d5e7640
IR
7963 stmt = pattern_stmt;
7964 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 7965 if (dump_enabled_p ())
9d5e7640 7966 {
78c60e3d
SS
7967 dump_printf_loc (MSG_NOTE, vect_location,
7968 "==> examining pattern statement: ");
7969 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9d5e7640
IR
7970 }
7971 }
7972 else
7973 {
73fbfcad 7974 if (dump_enabled_p ())
e645e942 7975 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
ebfd146a 7976
9d5e7640
IR
7977 return true;
7978 }
8644a673 7979 }
83197f37 7980 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
079c527f 7981 && node == NULL
83197f37
IR
7982 && pattern_stmt
7983 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7984 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7985 {
7986 /* Analyze PATTERN_STMT too. */
73fbfcad 7987 if (dump_enabled_p ())
83197f37 7988 {
78c60e3d
SS
7989 dump_printf_loc (MSG_NOTE, vect_location,
7990 "==> examining pattern statement: ");
7991 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
83197f37
IR
7992 }
7993
7994 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7995 return false;
7996 }
ebfd146a 7997
1107f3ae 7998 if (is_pattern_stmt_p (stmt_info)
079c527f 7999 && node == NULL
363477c0 8000 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
1107f3ae 8001 {
363477c0 8002 gimple_stmt_iterator si;
1107f3ae 8003
363477c0
JJ
8004 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8005 {
355fe088 8006 gimple *pattern_def_stmt = gsi_stmt (si);
363477c0
JJ
8007 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8008 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8009 {
8010 /* Analyze def stmt of STMT if it's a pattern stmt. */
73fbfcad 8011 if (dump_enabled_p ())
363477c0 8012 {
78c60e3d
SS
8013 dump_printf_loc (MSG_NOTE, vect_location,
8014 "==> examining pattern def statement: ");
8015 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
363477c0 8016 }
1107f3ae 8017
363477c0
JJ
8018 if (!vect_analyze_stmt (pattern_def_stmt,
8019 need_to_vectorize, node))
8020 return false;
8021 }
8022 }
8023 }
1107f3ae 8024
8644a673
IR
8025 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8026 {
8027 case vect_internal_def:
8028 break;
ebfd146a 8029
8644a673 8030 case vect_reduction_def:
7c5222ff 8031 case vect_nested_cycle:
14a61437
RB
8032 gcc_assert (!bb_vinfo
8033 && (relevance == vect_used_in_outer
8034 || relevance == vect_used_in_outer_by_reduction
8035 || relevance == vect_used_by_reduction
8036 || relevance == vect_unused_in_scope));
8644a673
IR
8037 break;
8038
8039 case vect_induction_def:
8040 case vect_constant_def:
8041 case vect_external_def:
8042 case vect_unknown_def_type:
8043 default:
8044 gcc_unreachable ();
8045 }
ebfd146a 8046
a70d6342
IR
8047 if (bb_vinfo)
8048 {
8049 gcc_assert (PURE_SLP_STMT (stmt_info));
8050
b690cc0f 8051 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 8052 if (dump_enabled_p ())
a70d6342 8053 {
78c60e3d
SS
8054 dump_printf_loc (MSG_NOTE, vect_location,
8055 "get vectype for scalar type: ");
8056 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 8057 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
8058 }
8059
8060 vectype = get_vectype_for_scalar_type (scalar_type);
8061 if (!vectype)
8062 {
73fbfcad 8063 if (dump_enabled_p ())
a70d6342 8064 {
78c60e3d
SS
8065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8066 "not SLPed: unsupported data-type ");
8067 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8068 scalar_type);
e645e942 8069 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
a70d6342
IR
8070 }
8071 return false;
8072 }
8073
73fbfcad 8074 if (dump_enabled_p ())
a70d6342 8075 {
78c60e3d
SS
8076 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8077 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 8078 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
8079 }
8080
8081 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8082 }
8083
8644a673 8084 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 8085 {
8644a673 8086 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
0136f8f0
AH
8087 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8088 || (is_gimple_call (stmt)
8089 && gimple_call_lhs (stmt) == NULL_TREE));
8644a673 8090 *need_to_vectorize = true;
ebfd146a
IR
8091 }
8092
b1af7da6
RB
8093 if (PURE_SLP_STMT (stmt_info) && !node)
8094 {
8095 dump_printf_loc (MSG_NOTE, vect_location,
8096 "handled only by SLP analysis\n");
8097 return true;
8098 }
8099
8100 ok = true;
8101 if (!bb_vinfo
8102 && (STMT_VINFO_RELEVANT_P (stmt_info)
8103 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8104 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8105 || vectorizable_conversion (stmt, NULL, NULL, node)
8106 || vectorizable_shift (stmt, NULL, NULL, node)
8107 || vectorizable_operation (stmt, NULL, NULL, node)
8108 || vectorizable_assignment (stmt, NULL, NULL, node)
8109 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8110 || vectorizable_call (stmt, NULL, NULL, node)
8111 || vectorizable_store (stmt, NULL, NULL, node)
8112 || vectorizable_reduction (stmt, NULL, NULL, node)
42fd8198
IE
8113 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8114 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6
RB
8115 else
8116 {
8117 if (bb_vinfo)
8118 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8119 || vectorizable_conversion (stmt, NULL, NULL, node)
8120 || vectorizable_shift (stmt, NULL, NULL, node)
8121 || vectorizable_operation (stmt, NULL, NULL, node)
8122 || vectorizable_assignment (stmt, NULL, NULL, node)
8123 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8124 || vectorizable_call (stmt, NULL, NULL, node)
8125 || vectorizable_store (stmt, NULL, NULL, node)
42fd8198
IE
8126 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8127 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
b1af7da6 8128 }
8644a673
IR
8129
8130 if (!ok)
ebfd146a 8131 {
73fbfcad 8132 if (dump_enabled_p ())
8644a673 8133 {
78c60e3d
SS
8134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8135 "not vectorized: relevant stmt not ");
8136 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8137 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8138 }
b8698a0f 8139
ebfd146a
IR
8140 return false;
8141 }
8142
a70d6342
IR
8143 if (bb_vinfo)
8144 return true;
8145
8644a673
IR
8146 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8147 need extra handling, except for vectorizable reductions. */
8148 if (STMT_VINFO_LIVE_P (stmt_info)
8149 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8150 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 8151
8644a673 8152 if (!ok)
ebfd146a 8153 {
73fbfcad 8154 if (dump_enabled_p ())
8644a673 8155 {
78c60e3d
SS
8156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8157 "not vectorized: live stmt not ");
8158 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8159 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8644a673 8160 }
b8698a0f 8161
8644a673 8162 return false;
ebfd146a
IR
8163 }
8164
ebfd146a
IR
8165 return true;
8166}
8167
8168
8169/* Function vect_transform_stmt.
8170
8171 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8172
8173bool
355fe088 8174vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
0d0293ac 8175 bool *grouped_store, slp_tree slp_node,
ebfd146a
IR
8176 slp_instance slp_node_instance)
8177{
8178 bool is_store = false;
355fe088 8179 gimple *vec_stmt = NULL;
ebfd146a 8180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 8181 bool done;
ebfd146a 8182
355fe088 8183 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
225ce44b 8184
ebfd146a
IR
8185 switch (STMT_VINFO_TYPE (stmt_info))
8186 {
8187 case type_demotion_vec_info_type:
ebfd146a 8188 case type_promotion_vec_info_type:
ebfd146a
IR
8189 case type_conversion_vec_info_type:
8190 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8191 gcc_assert (done);
8192 break;
8193
8194 case induc_vec_info_type:
8195 gcc_assert (!slp_node);
8196 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8197 gcc_assert (done);
8198 break;
8199
9dc3f7de
IR
8200 case shift_vec_info_type:
8201 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8202 gcc_assert (done);
8203 break;
8204
ebfd146a
IR
8205 case op_vec_info_type:
8206 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8207 gcc_assert (done);
8208 break;
8209
8210 case assignment_vec_info_type:
8211 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8212 gcc_assert (done);
8213 break;
8214
8215 case load_vec_info_type:
b8698a0f 8216 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
8217 slp_node_instance);
8218 gcc_assert (done);
8219 break;
8220
8221 case store_vec_info_type:
8222 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8223 gcc_assert (done);
0d0293ac 8224 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
ebfd146a
IR
8225 {
8226 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 8227 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
8228 one are skipped, and there vec_stmt_info shouldn't be freed
8229 meanwhile. */
0d0293ac 8230 *grouped_store = true;
ebfd146a
IR
8231 if (STMT_VINFO_VEC_STMT (stmt_info))
8232 is_store = true;
8233 }
8234 else
8235 is_store = true;
8236 break;
8237
8238 case condition_vec_info_type:
f7e531cf 8239 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
ebfd146a
IR
8240 gcc_assert (done);
8241 break;
8242
42fd8198
IE
8243 case comparison_vec_info_type:
8244 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8245 gcc_assert (done);
8246 break;
8247
ebfd146a 8248 case call_vec_info_type:
190c2236 8249 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
039d9ea1 8250 stmt = gsi_stmt (*gsi);
5ce9450f
JJ
8251 if (is_gimple_call (stmt)
8252 && gimple_call_internal_p (stmt)
8253 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8254 is_store = true;
ebfd146a
IR
8255 break;
8256
0136f8f0
AH
8257 case call_simd_clone_vec_info_type:
8258 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8259 stmt = gsi_stmt (*gsi);
8260 break;
8261
ebfd146a 8262 case reduc_vec_info_type:
b5aeb3bb 8263 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
8264 gcc_assert (done);
8265 break;
8266
8267 default:
8268 if (!STMT_VINFO_LIVE_P (stmt_info))
8269 {
73fbfcad 8270 if (dump_enabled_p ())
78c60e3d 8271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8272 "stmt not supported.\n");
ebfd146a
IR
8273 gcc_unreachable ();
8274 }
8275 }
8276
225ce44b
RB
8277 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8278 This would break hybrid SLP vectorization. */
8279 if (slp_node)
d90f8440
RB
8280 gcc_assert (!vec_stmt
8281 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
225ce44b 8282
ebfd146a
IR
8283 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8284 is being vectorized, but outside the immediately enclosing loop. */
8285 if (vec_stmt
a70d6342
IR
8286 && STMT_VINFO_LOOP_VINFO (stmt_info)
8287 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8288 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
8289 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8290 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 8291 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 8292 vect_used_in_outer_by_reduction))
ebfd146a 8293 {
a70d6342
IR
8294 struct loop *innerloop = LOOP_VINFO_LOOP (
8295 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
8296 imm_use_iterator imm_iter;
8297 use_operand_p use_p;
8298 tree scalar_dest;
355fe088 8299 gimple *exit_phi;
ebfd146a 8300
73fbfcad 8301 if (dump_enabled_p ())
78c60e3d 8302 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 8303 "Record the vdef for outer-loop vectorization.\n");
ebfd146a
IR
8304
8305 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8306 (to be used when vectorizing outer-loop stmts that use the DEF of
8307 STMT). */
8308 if (gimple_code (stmt) == GIMPLE_PHI)
8309 scalar_dest = PHI_RESULT (stmt);
8310 else
8311 scalar_dest = gimple_assign_lhs (stmt);
8312
8313 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8314 {
8315 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8316 {
8317 exit_phi = USE_STMT (use_p);
8318 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8319 }
8320 }
8321 }
8322
8323 /* Handle stmts whose DEF is used outside the loop-nest that is
8324 being vectorized. */
8325 if (STMT_VINFO_LIVE_P (stmt_info)
8326 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8327 {
8328 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
8329 gcc_assert (done);
8330 }
8331
8332 if (vec_stmt)
83197f37 8333 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
ebfd146a 8334
b8698a0f 8335 return is_store;
ebfd146a
IR
8336}
8337
8338
b8698a0f 8339/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
8340 stmt_vec_info. */
8341
8342void
355fe088 8343vect_remove_stores (gimple *first_stmt)
ebfd146a 8344{
355fe088
TS
8345 gimple *next = first_stmt;
8346 gimple *tmp;
ebfd146a
IR
8347 gimple_stmt_iterator next_si;
8348
8349 while (next)
8350 {
78048b1c
JJ
8351 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8352
8353 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8354 if (is_pattern_stmt_p (stmt_info))
8355 next = STMT_VINFO_RELATED_STMT (stmt_info);
ebfd146a
IR
8356 /* Free the attached stmt_vec_info and remove the stmt. */
8357 next_si = gsi_for_stmt (next);
3d3f2249 8358 unlink_stmt_vdef (next);
ebfd146a 8359 gsi_remove (&next_si, true);
3d3f2249 8360 release_defs (next);
ebfd146a
IR
8361 free_stmt_vec_info (next);
8362 next = tmp;
8363 }
8364}
8365
8366
8367/* Function new_stmt_vec_info.
8368
8369 Create and initialize a new stmt_vec_info struct for STMT. */
8370
8371stmt_vec_info
310213d4 8372new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
ebfd146a
IR
8373{
8374 stmt_vec_info res;
8375 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8376
8377 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8378 STMT_VINFO_STMT (res) = stmt;
310213d4 8379 res->vinfo = vinfo;
8644a673 8380 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
8381 STMT_VINFO_LIVE_P (res) = false;
8382 STMT_VINFO_VECTYPE (res) = NULL;
8383 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 8384 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
8385 STMT_VINFO_IN_PATTERN_P (res) = false;
8386 STMT_VINFO_RELATED_STMT (res) = NULL;
363477c0 8387 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
ebfd146a 8388 STMT_VINFO_DATA_REF (res) = NULL;
af29617a 8389 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
ebfd146a
IR
8390
8391 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8392 STMT_VINFO_DR_OFFSET (res) = NULL;
8393 STMT_VINFO_DR_INIT (res) = NULL;
8394 STMT_VINFO_DR_STEP (res) = NULL;
8395 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8396
8397 if (gimple_code (stmt) == GIMPLE_PHI
8398 && is_loop_header_bb_p (gimple_bb (stmt)))
8399 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8400 else
8644a673
IR
8401 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8402
9771b263 8403 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
32e8bb8e 8404 STMT_SLP_TYPE (res) = loop_vect;
78810bd3
RB
8405 STMT_VINFO_NUM_SLP_USES (res) = 0;
8406
e14c1050
IR
8407 GROUP_FIRST_ELEMENT (res) = NULL;
8408 GROUP_NEXT_ELEMENT (res) = NULL;
8409 GROUP_SIZE (res) = 0;
8410 GROUP_STORE_COUNT (res) = 0;
8411 GROUP_GAP (res) = 0;
8412 GROUP_SAME_DR_STMT (res) = NULL;
ebfd146a
IR
8413
8414 return res;
8415}
8416
8417
8418/* Create a hash table for stmt_vec_info. */
8419
8420void
8421init_stmt_vec_info_vec (void)
8422{
9771b263
DN
8423 gcc_assert (!stmt_vec_info_vec.exists ());
8424 stmt_vec_info_vec.create (50);
ebfd146a
IR
8425}
8426
8427
8428/* Free hash table for stmt_vec_info. */
8429
8430void
8431free_stmt_vec_info_vec (void)
8432{
93675444 8433 unsigned int i;
3161455c 8434 stmt_vec_info info;
93675444
JJ
8435 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8436 if (info != NULL)
3161455c 8437 free_stmt_vec_info (STMT_VINFO_STMT (info));
9771b263
DN
8438 gcc_assert (stmt_vec_info_vec.exists ());
8439 stmt_vec_info_vec.release ();
ebfd146a
IR
8440}
8441
8442
8443/* Free stmt vectorization related info. */
8444
8445void
355fe088 8446free_stmt_vec_info (gimple *stmt)
ebfd146a
IR
8447{
8448 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8449
8450 if (!stmt_info)
8451 return;
8452
78048b1c
JJ
8453 /* Check if this statement has a related "pattern stmt"
8454 (introduced by the vectorizer during the pattern recognition
8455 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8456 too. */
8457 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8458 {
8459 stmt_vec_info patt_info
8460 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8461 if (patt_info)
8462 {
363477c0 8463 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
355fe088 8464 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
f0281fde
RB
8465 gimple_set_bb (patt_stmt, NULL);
8466 tree lhs = gimple_get_lhs (patt_stmt);
e6f5c25d 8467 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde 8468 release_ssa_name (lhs);
363477c0
JJ
8469 if (seq)
8470 {
8471 gimple_stmt_iterator si;
8472 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
f0281fde 8473 {
355fe088 8474 gimple *seq_stmt = gsi_stmt (si);
f0281fde 8475 gimple_set_bb (seq_stmt, NULL);
7532abf2 8476 lhs = gimple_get_lhs (seq_stmt);
e6f5c25d 8477 if (lhs && TREE_CODE (lhs) == SSA_NAME)
f0281fde
RB
8478 release_ssa_name (lhs);
8479 free_stmt_vec_info (seq_stmt);
8480 }
363477c0 8481 }
f0281fde 8482 free_stmt_vec_info (patt_stmt);
78048b1c
JJ
8483 }
8484 }
8485
9771b263 8486 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6c9e85fb 8487 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
ebfd146a
IR
8488 set_vinfo_for_stmt (stmt, NULL);
8489 free (stmt_info);
8490}
8491
8492
bb67d9c7 8493/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 8494
bb67d9c7 8495 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
8496 by the target. */
8497
bb67d9c7
RG
8498static tree
8499get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a 8500{
ef4bddc2
RS
8501 machine_mode inner_mode = TYPE_MODE (scalar_type);
8502 machine_mode simd_mode;
2f816591 8503 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
8504 int nunits;
8505 tree vectype;
8506
cc4b5170 8507 if (nbytes == 0)
ebfd146a
IR
8508 return NULL_TREE;
8509
48f2e373
RB
8510 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8511 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8512 return NULL_TREE;
8513
7b7b1813
RG
8514 /* For vector types of elements whose mode precision doesn't
8515 match their types precision we use a element type of mode
8516 precision. The vectorization routines will have to make sure
48f2e373
RB
8517 they support the proper result truncation/extension.
8518 We also make sure to build vector types with INTEGER_TYPE
8519 component type only. */
6d7971b8 8520 if (INTEGRAL_TYPE_P (scalar_type)
48f2e373
RB
8521 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8522 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7b7b1813
RG
8523 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8524 TYPE_UNSIGNED (scalar_type));
6d7971b8 8525
ccbf5bb4
RG
8526 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8527 When the component mode passes the above test simply use a type
8528 corresponding to that mode. The theory is that any use that
8529 would cause problems with this will disable vectorization anyway. */
dfc2e2ac 8530 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
e67f39f7 8531 && !INTEGRAL_TYPE_P (scalar_type))
60b95d28
RB
8532 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8533
8534 /* We can't build a vector type of elements with alignment bigger than
8535 their size. */
dfc2e2ac 8536 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
aca43c6c
JJ
8537 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8538 TYPE_UNSIGNED (scalar_type));
ccbf5bb4 8539
dfc2e2ac
RB
8540 /* If we felt back to using the mode fail if there was
8541 no scalar type for it. */
8542 if (scalar_type == NULL_TREE)
8543 return NULL_TREE;
8544
bb67d9c7
RG
8545 /* If no size was supplied use the mode the target prefers. Otherwise
8546 lookup a vector mode of the specified size. */
8547 if (size == 0)
8548 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8549 else
8550 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
8551 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8552 if (nunits <= 1)
8553 return NULL_TREE;
ebfd146a
IR
8554
8555 vectype = build_vector_type (scalar_type, nunits);
ebfd146a
IR
8556
8557 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8558 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
451dabda 8559 return NULL_TREE;
ebfd146a
IR
8560
8561 return vectype;
8562}
8563
bb67d9c7
RG
8564unsigned int current_vector_size;
8565
8566/* Function get_vectype_for_scalar_type.
8567
8568 Returns the vector type corresponding to SCALAR_TYPE as supported
8569 by the target. */
8570
8571tree
8572get_vectype_for_scalar_type (tree scalar_type)
8573{
8574 tree vectype;
8575 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8576 current_vector_size);
8577 if (vectype
8578 && current_vector_size == 0)
8579 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8580 return vectype;
8581}
8582
42fd8198
IE
8583/* Function get_mask_type_for_scalar_type.
8584
8585 Returns the mask type corresponding to a result of comparison
8586 of vectors of specified SCALAR_TYPE as supported by target. */
8587
8588tree
8589get_mask_type_for_scalar_type (tree scalar_type)
8590{
8591 tree vectype = get_vectype_for_scalar_type (scalar_type);
8592
8593 if (!vectype)
8594 return NULL;
8595
8596 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8597 current_vector_size);
8598}
8599
b690cc0f
RG
8600/* Function get_same_sized_vectype
8601
8602 Returns a vector type corresponding to SCALAR_TYPE of size
8603 VECTOR_TYPE if supported by the target. */
8604
8605tree
bb67d9c7 8606get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 8607{
9f47c7e5
IE
8608 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8609 return build_same_sized_truth_vector_type (vector_type);
8610
bb67d9c7
RG
8611 return get_vectype_for_scalar_type_and_size
8612 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
8613}
8614
ebfd146a
IR
8615/* Function vect_is_simple_use.
8616
8617 Input:
81c40241
RB
8618 VINFO - the vect info of the loop or basic block that is being vectorized.
8619 OPERAND - operand in the loop or bb.
8620 Output:
8621 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8622 DT - the type of definition
ebfd146a
IR
8623
8624 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 8625 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 8626 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 8627 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
8628 is the case in reduction/induction computations).
8629 For basic blocks, supportable operands are constants and bb invariants.
8630 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
8631
8632bool
81c40241
RB
8633vect_is_simple_use (tree operand, vec_info *vinfo,
8634 gimple **def_stmt, enum vect_def_type *dt)
b8698a0f 8635{
ebfd146a 8636 *def_stmt = NULL;
3fc356dc 8637 *dt = vect_unknown_def_type;
b8698a0f 8638
73fbfcad 8639 if (dump_enabled_p ())
ebfd146a 8640 {
78c60e3d
SS
8641 dump_printf_loc (MSG_NOTE, vect_location,
8642 "vect_is_simple_use: operand ");
8643 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
e645e942 8644 dump_printf (MSG_NOTE, "\n");
ebfd146a 8645 }
b8698a0f 8646
b758f602 8647 if (CONSTANT_CLASS_P (operand))
ebfd146a
IR
8648 {
8649 *dt = vect_constant_def;
8650 return true;
8651 }
b8698a0f 8652
ebfd146a
IR
8653 if (is_gimple_min_invariant (operand))
8654 {
8644a673 8655 *dt = vect_external_def;
ebfd146a
IR
8656 return true;
8657 }
8658
ebfd146a
IR
8659 if (TREE_CODE (operand) != SSA_NAME)
8660 {
73fbfcad 8661 if (dump_enabled_p ())
af29617a
AH
8662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8663 "not ssa-name.\n");
ebfd146a
IR
8664 return false;
8665 }
b8698a0f 8666
3fc356dc 8667 if (SSA_NAME_IS_DEFAULT_DEF (operand))
ebfd146a 8668 {
3fc356dc
RB
8669 *dt = vect_external_def;
8670 return true;
ebfd146a
IR
8671 }
8672
3fc356dc 8673 *def_stmt = SSA_NAME_DEF_STMT (operand);
73fbfcad 8674 if (dump_enabled_p ())
ebfd146a 8675 {
78c60e3d
SS
8676 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8677 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
ebfd146a
IR
8678 }
8679
61d371eb 8680 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8644a673 8681 *dt = vect_external_def;
ebfd146a
IR
8682 else
8683 {
3fc356dc 8684 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
603cca93 8685 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
ebfd146a
IR
8686 }
8687
2e8ab70c
RB
8688 if (dump_enabled_p ())
8689 {
8690 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8691 switch (*dt)
8692 {
8693 case vect_uninitialized_def:
8694 dump_printf (MSG_NOTE, "uninitialized\n");
8695 break;
8696 case vect_constant_def:
8697 dump_printf (MSG_NOTE, "constant\n");
8698 break;
8699 case vect_external_def:
8700 dump_printf (MSG_NOTE, "external\n");
8701 break;
8702 case vect_internal_def:
8703 dump_printf (MSG_NOTE, "internal\n");
8704 break;
8705 case vect_induction_def:
8706 dump_printf (MSG_NOTE, "induction\n");
8707 break;
8708 case vect_reduction_def:
8709 dump_printf (MSG_NOTE, "reduction\n");
8710 break;
8711 case vect_double_reduction_def:
8712 dump_printf (MSG_NOTE, "double reduction\n");
8713 break;
8714 case vect_nested_cycle:
8715 dump_printf (MSG_NOTE, "nested cycle\n");
8716 break;
8717 case vect_unknown_def_type:
8718 dump_printf (MSG_NOTE, "unknown\n");
8719 break;
8720 }
8721 }
8722
81c40241 8723 if (*dt == vect_unknown_def_type)
ebfd146a 8724 {
73fbfcad 8725 if (dump_enabled_p ())
78c60e3d 8726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8727 "Unsupported pattern.\n");
ebfd146a
IR
8728 return false;
8729 }
8730
ebfd146a
IR
8731 switch (gimple_code (*def_stmt))
8732 {
8733 case GIMPLE_PHI:
ebfd146a 8734 case GIMPLE_ASSIGN:
ebfd146a 8735 case GIMPLE_CALL:
81c40241 8736 break;
ebfd146a 8737 default:
73fbfcad 8738 if (dump_enabled_p ())
78c60e3d 8739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 8740 "unsupported defining stmt:\n");
ebfd146a
IR
8741 return false;
8742 }
8743
8744 return true;
8745}
8746
81c40241 8747/* Function vect_is_simple_use.
b690cc0f 8748
81c40241 8749 Same as vect_is_simple_use but also determines the vector operand
b690cc0f
RG
8750 type of OPERAND and stores it to *VECTYPE. If the definition of
8751 OPERAND is vect_uninitialized_def, vect_constant_def or
8752 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8753 is responsible to compute the best suited vector type for the
8754 scalar operand. */
8755
8756bool
81c40241
RB
8757vect_is_simple_use (tree operand, vec_info *vinfo,
8758 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
b690cc0f 8759{
81c40241 8760 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
b690cc0f
RG
8761 return false;
8762
8763 /* Now get a vector type if the def is internal, otherwise supply
8764 NULL_TREE and leave it up to the caller to figure out a proper
8765 type for the use stmt. */
8766 if (*dt == vect_internal_def
8767 || *dt == vect_induction_def
8768 || *dt == vect_reduction_def
8769 || *dt == vect_double_reduction_def
8770 || *dt == vect_nested_cycle)
8771 {
8772 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
83197f37
IR
8773
8774 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8775 && !STMT_VINFO_RELEVANT (stmt_info)
8776 && !STMT_VINFO_LIVE_P (stmt_info))
b690cc0f 8777 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
83197f37 8778
b690cc0f
RG
8779 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8780 gcc_assert (*vectype != NULL_TREE);
8781 }
8782 else if (*dt == vect_uninitialized_def
8783 || *dt == vect_constant_def
8784 || *dt == vect_external_def)
8785 *vectype = NULL_TREE;
8786 else
8787 gcc_unreachable ();
8788
8789 return true;
8790}
8791
ebfd146a
IR
8792
8793/* Function supportable_widening_operation
8794
b8698a0f
L
8795 Check whether an operation represented by the code CODE is a
8796 widening operation that is supported by the target platform in
b690cc0f
RG
8797 vector form (i.e., when operating on arguments of type VECTYPE_IN
8798 producing a result of type VECTYPE_OUT).
b8698a0f 8799
ebfd146a
IR
8800 Widening operations we currently support are NOP (CONVERT), FLOAT
8801 and WIDEN_MULT. This function checks if these operations are supported
8802 by the target platform either directly (via vector tree-codes), or via
8803 target builtins.
8804
8805 Output:
b8698a0f
L
8806 - CODE1 and CODE2 are codes of vector operations to be used when
8807 vectorizing the operation, if available.
ebfd146a
IR
8808 - MULTI_STEP_CVT determines the number of required intermediate steps in
8809 case of multi-step conversion (like char->short->int - in that case
8810 MULTI_STEP_CVT will be 1).
b8698a0f
L
8811 - INTERM_TYPES contains the intermediate type required to perform the
8812 widening operation (short in the above example). */
ebfd146a
IR
8813
8814bool
355fe088 8815supportable_widening_operation (enum tree_code code, gimple *stmt,
b690cc0f 8816 tree vectype_out, tree vectype_in,
ebfd146a
IR
8817 enum tree_code *code1, enum tree_code *code2,
8818 int *multi_step_cvt,
9771b263 8819 vec<tree> *interm_types)
ebfd146a
IR
8820{
8821 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8822 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4ef69dfc 8823 struct loop *vect_loop = NULL;
ef4bddc2 8824 machine_mode vec_mode;
81f40b79 8825 enum insn_code icode1, icode2;
ebfd146a 8826 optab optab1, optab2;
b690cc0f
RG
8827 tree vectype = vectype_in;
8828 tree wide_vectype = vectype_out;
ebfd146a 8829 enum tree_code c1, c2;
4a00c761
JJ
8830 int i;
8831 tree prev_type, intermediate_type;
ef4bddc2 8832 machine_mode intermediate_mode, prev_mode;
4a00c761 8833 optab optab3, optab4;
ebfd146a 8834
4a00c761 8835 *multi_step_cvt = 0;
4ef69dfc
IR
8836 if (loop_info)
8837 vect_loop = LOOP_VINFO_LOOP (loop_info);
8838
ebfd146a
IR
8839 switch (code)
8840 {
8841 case WIDEN_MULT_EXPR:
6ae6116f
RH
8842 /* The result of a vectorized widening operation usually requires
8843 two vectors (because the widened results do not fit into one vector).
8844 The generated vector results would normally be expected to be
8845 generated in the same order as in the original scalar computation,
8846 i.e. if 8 results are generated in each vector iteration, they are
8847 to be organized as follows:
8848 vect1: [res1,res2,res3,res4],
8849 vect2: [res5,res6,res7,res8].
8850
8851 However, in the special case that the result of the widening
8852 operation is used in a reduction computation only, the order doesn't
8853 matter (because when vectorizing a reduction we change the order of
8854 the computation). Some targets can take advantage of this and
8855 generate more efficient code. For example, targets like Altivec,
8856 that support widen_mult using a sequence of {mult_even,mult_odd}
8857 generate the following vectors:
8858 vect1: [res1,res3,res5,res7],
8859 vect2: [res2,res4,res6,res8].
8860
8861 When vectorizing outer-loops, we execute the inner-loop sequentially
8862 (each vectorized inner-loop iteration contributes to VF outer-loop
8863 iterations in parallel). We therefore don't allow to change the
8864 order of the computation in the inner-loop during outer-loop
8865 vectorization. */
8866 /* TODO: Another case in which order doesn't *really* matter is when we
8867 widen and then contract again, e.g. (short)((int)x * y >> 8).
8868 Normally, pack_trunc performs an even/odd permute, whereas the
8869 repack from an even/odd expansion would be an interleave, which
8870 would be significantly simpler for e.g. AVX2. */
8871 /* In any case, in order to avoid duplicating the code below, recurse
8872 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8873 are properly set up for the caller. If we fail, we'll continue with
8874 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8875 if (vect_loop
8876 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8877 && !nested_in_vect_loop_p (vect_loop, stmt)
8878 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8879 stmt, vectype_out, vectype_in,
a86ec597
RH
8880 code1, code2, multi_step_cvt,
8881 interm_types))
ebc047a2
CH
8882 {
8883 /* Elements in a vector with vect_used_by_reduction property cannot
8884 be reordered if the use chain with this property does not have the
8885 same operation. One such an example is s += a * b, where elements
8886 in a and b cannot be reordered. Here we check if the vector defined
8887 by STMT is only directly used in the reduction statement. */
8888 tree lhs = gimple_assign_lhs (stmt);
8889 use_operand_p dummy;
355fe088 8890 gimple *use_stmt;
ebc047a2
CH
8891 stmt_vec_info use_stmt_info = NULL;
8892 if (single_imm_use (lhs, &dummy, &use_stmt)
8893 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8894 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8895 return true;
8896 }
4a00c761
JJ
8897 c1 = VEC_WIDEN_MULT_LO_EXPR;
8898 c2 = VEC_WIDEN_MULT_HI_EXPR;
ebfd146a
IR
8899 break;
8900
81c40241
RB
8901 case DOT_PROD_EXPR:
8902 c1 = DOT_PROD_EXPR;
8903 c2 = DOT_PROD_EXPR;
8904 break;
8905
8906 case SAD_EXPR:
8907 c1 = SAD_EXPR;
8908 c2 = SAD_EXPR;
8909 break;
8910
6ae6116f
RH
8911 case VEC_WIDEN_MULT_EVEN_EXPR:
8912 /* Support the recursion induced just above. */
8913 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8914 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8915 break;
8916
36ba4aae 8917 case WIDEN_LSHIFT_EXPR:
4a00c761
JJ
8918 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8919 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
36ba4aae
IR
8920 break;
8921
ebfd146a 8922 CASE_CONVERT:
4a00c761
JJ
8923 c1 = VEC_UNPACK_LO_EXPR;
8924 c2 = VEC_UNPACK_HI_EXPR;
ebfd146a
IR
8925 break;
8926
8927 case FLOAT_EXPR:
4a00c761
JJ
8928 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8929 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
ebfd146a
IR
8930 break;
8931
8932 case FIX_TRUNC_EXPR:
8933 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8934 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8935 computing the operation. */
8936 return false;
8937
8938 default:
8939 gcc_unreachable ();
8940 }
8941
6ae6116f 8942 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6b4db501 8943 std::swap (c1, c2);
4a00c761 8944
ebfd146a
IR
8945 if (code == FIX_TRUNC_EXPR)
8946 {
8947 /* The signedness is determined from output operand. */
b690cc0f
RG
8948 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8949 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
8950 }
8951 else
8952 {
8953 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8954 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8955 }
8956
8957 if (!optab1 || !optab2)
8958 return false;
8959
8960 vec_mode = TYPE_MODE (vectype);
947131ba
RS
8961 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8962 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
8963 return false;
8964
4a00c761
JJ
8965 *code1 = c1;
8966 *code2 = c2;
8967
8968 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8969 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8970 return true;
8971
b8698a0f 8972 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a 8973 types. */
ebfd146a 8974
4a00c761
JJ
8975 prev_type = vectype;
8976 prev_mode = vec_mode;
b8698a0f 8977
4a00c761
JJ
8978 if (!CONVERT_EXPR_CODE_P (code))
8979 return false;
b8698a0f 8980
4a00c761
JJ
8981 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8982 intermediate steps in promotion sequence. We try
8983 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8984 not. */
9771b263 8985 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
8986 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8987 {
8988 intermediate_mode = insn_data[icode1].operand[0].mode;
8989 intermediate_type
8990 = lang_hooks.types.type_for_mode (intermediate_mode,
8991 TYPE_UNSIGNED (prev_type));
8992 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8993 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8994
8995 if (!optab3 || !optab4
8996 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8997 || insn_data[icode1].operand[0].mode != intermediate_mode
8998 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8999 || insn_data[icode2].operand[0].mode != intermediate_mode
9000 || ((icode1 = optab_handler (optab3, intermediate_mode))
9001 == CODE_FOR_nothing)
9002 || ((icode2 = optab_handler (optab4, intermediate_mode))
9003 == CODE_FOR_nothing))
9004 break;
ebfd146a 9005
9771b263 9006 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9007 (*multi_step_cvt)++;
9008
9009 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9010 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9011 return true;
9012
9013 prev_type = intermediate_type;
9014 prev_mode = intermediate_mode;
ebfd146a
IR
9015 }
9016
9771b263 9017 interm_types->release ();
4a00c761 9018 return false;
ebfd146a
IR
9019}
9020
9021
9022/* Function supportable_narrowing_operation
9023
b8698a0f
L
9024 Check whether an operation represented by the code CODE is a
9025 narrowing operation that is supported by the target platform in
b690cc0f
RG
9026 vector form (i.e., when operating on arguments of type VECTYPE_IN
9027 and producing a result of type VECTYPE_OUT).
b8698a0f 9028
ebfd146a 9029 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 9030 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
9031 the target platform directly via vector tree-codes.
9032
9033 Output:
b8698a0f
L
9034 - CODE1 is the code of a vector operation to be used when
9035 vectorizing the operation, if available.
ebfd146a
IR
9036 - MULTI_STEP_CVT determines the number of required intermediate steps in
9037 case of multi-step conversion (like int->short->char - in that case
9038 MULTI_STEP_CVT will be 1).
9039 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 9040 narrowing operation (short in the above example). */
ebfd146a
IR
9041
9042bool
9043supportable_narrowing_operation (enum tree_code code,
b690cc0f 9044 tree vectype_out, tree vectype_in,
ebfd146a 9045 enum tree_code *code1, int *multi_step_cvt,
9771b263 9046 vec<tree> *interm_types)
ebfd146a 9047{
ef4bddc2 9048 machine_mode vec_mode;
ebfd146a
IR
9049 enum insn_code icode1;
9050 optab optab1, interm_optab;
b690cc0f
RG
9051 tree vectype = vectype_in;
9052 tree narrow_vectype = vectype_out;
ebfd146a 9053 enum tree_code c1;
4a00c761 9054 tree intermediate_type;
ef4bddc2 9055 machine_mode intermediate_mode, prev_mode;
ebfd146a 9056 int i;
4a00c761 9057 bool uns;
ebfd146a 9058
4a00c761 9059 *multi_step_cvt = 0;
ebfd146a
IR
9060 switch (code)
9061 {
9062 CASE_CONVERT:
9063 c1 = VEC_PACK_TRUNC_EXPR;
9064 break;
9065
9066 case FIX_TRUNC_EXPR:
9067 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9068 break;
9069
9070 case FLOAT_EXPR:
9071 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9072 tree code and optabs used for computing the operation. */
9073 return false;
9074
9075 default:
9076 gcc_unreachable ();
9077 }
9078
9079 if (code == FIX_TRUNC_EXPR)
9080 /* The signedness is determined from output operand. */
b690cc0f 9081 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
9082 else
9083 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9084
9085 if (!optab1)
9086 return false;
9087
9088 vec_mode = TYPE_MODE (vectype);
947131ba 9089 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
9090 return false;
9091
4a00c761
JJ
9092 *code1 = c1;
9093
9094 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9095 return true;
9096
ebfd146a
IR
9097 /* Check if it's a multi-step conversion that can be done using intermediate
9098 types. */
4a00c761
JJ
9099 prev_mode = vec_mode;
9100 if (code == FIX_TRUNC_EXPR)
9101 uns = TYPE_UNSIGNED (vectype_out);
9102 else
9103 uns = TYPE_UNSIGNED (vectype);
9104
9105 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9106 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9107 costly than signed. */
9108 if (code == FIX_TRUNC_EXPR && uns)
9109 {
9110 enum insn_code icode2;
9111
9112 intermediate_type
9113 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9114 interm_optab
9115 = optab_for_tree_code (c1, intermediate_type, optab_default);
2225b9f2 9116 if (interm_optab != unknown_optab
4a00c761
JJ
9117 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9118 && insn_data[icode1].operand[0].mode
9119 == insn_data[icode2].operand[0].mode)
9120 {
9121 uns = false;
9122 optab1 = interm_optab;
9123 icode1 = icode2;
9124 }
9125 }
ebfd146a 9126
4a00c761
JJ
9127 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9128 intermediate steps in promotion sequence. We try
9129 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9771b263 9130 interm_types->create (MAX_INTERM_CVT_STEPS);
4a00c761
JJ
9131 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9132 {
9133 intermediate_mode = insn_data[icode1].operand[0].mode;
9134 intermediate_type
9135 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9136 interm_optab
9137 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9138 optab_default);
9139 if (!interm_optab
9140 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9141 || insn_data[icode1].operand[0].mode != intermediate_mode
9142 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9143 == CODE_FOR_nothing))
9144 break;
9145
9771b263 9146 interm_types->quick_push (intermediate_type);
4a00c761
JJ
9147 (*multi_step_cvt)++;
9148
9149 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9150 return true;
9151
9152 prev_mode = intermediate_mode;
9153 optab1 = interm_optab;
ebfd146a
IR
9154 }
9155
9771b263 9156 interm_types->release ();
4a00c761 9157 return false;
ebfd146a 9158}