]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
2014-11-01 Andrew MacLeod <amacleod@redhat,com>
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
fb85abff 1/* Statement Analysis and Transformation for Vectorization
3aea1f79 2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
7bd765d4 25#include "dumpfile.h"
fb85abff 26#include "tm.h"
fb85abff 27#include "tree.h"
9ed99284 28#include "stor-layout.h"
fb85abff 29#include "target.h"
94ea8568 30#include "predict.h"
31#include "vec.h"
32#include "hashtab.h"
33#include "hash-set.h"
34#include "machmode.h"
35#include "hard-reg-set.h"
36#include "input.h"
37#include "function.h"
38#include "dominance.h"
39#include "cfg.h"
fb85abff 40#include "basic-block.h"
ce084dfc 41#include "gimple-pretty-print.h"
bc61cadb 42#include "tree-ssa-alias.h"
43#include "internal-fn.h"
44#include "tree-eh.h"
45#include "gimple-expr.h"
46#include "is-a.h"
e795d6e1 47#include "gimple.h"
a8783bee 48#include "gimplify.h"
dcf1a1ec 49#include "gimple-iterator.h"
e795d6e1 50#include "gimplify-me.h"
073c1fd5 51#include "gimple-ssa.h"
52#include "tree-cfg.h"
53#include "tree-phinodes.h"
54#include "ssa-iterators.h"
9ed99284 55#include "stringpool.h"
073c1fd5 56#include "tree-ssanames.h"
05d9c18a 57#include "tree-ssa-loop-manip.h"
fb85abff 58#include "cfgloop.h"
d09768a4 59#include "tree-ssa-loop.h"
60#include "tree-scalar-evolution.h"
fb85abff 61#include "expr.h"
b9ed1410 62#include "recog.h" /* FIXME: for insn_data */
34517c64 63#include "insn-codes.h"
fb85abff 64#include "optabs.h"
0b205f4c 65#include "diagnostic-core.h"
fb85abff 66#include "tree-vectorizer.h"
b9ed1410 67#include "dumpfile.h"
1140c305 68#include "hash-map.h"
69#include "plugin-api.h"
70#include "ipa-ref.h"
d09768a4 71#include "cgraph.h"
f7715905 72#include "builtins.h"
fb85abff 73
b9ed1410 74/* For lang_hooks.types.type_for_mode. */
75#include "langhooks.h"
fb85abff 76
4db2b577 77/* Return the vectorized type for the given statement. */
78
79tree
80stmt_vectype (struct _stmt_vec_info *stmt_info)
81{
82 return STMT_VINFO_VECTYPE (stmt_info);
83}
84
85/* Return TRUE iff the given statement is in an inner loop relative to
86 the loop being vectorized. */
87bool
88stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
89{
90 gimple stmt = STMT_VINFO_STMT (stmt_info);
91 basic_block bb = gimple_bb (stmt);
92 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
93 struct loop* loop;
94
95 if (!loop_vinfo)
96 return false;
97
98 loop = LOOP_VINFO_LOOP (loop_vinfo);
99
100 return (bb->loop_father == loop->inner);
101}
102
103/* Record the cost of a statement, either by directly informing the
104 target model or by saving it in a vector for later processing.
105 Return a preliminary estimate of the statement's cost. */
106
107unsigned
f97dec81 108record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
4db2b577 109 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
f97dec81 110 int misalign, enum vect_cost_model_location where)
4db2b577 111{
f97dec81 112 if (body_cost_vec)
4db2b577 113 {
f97dec81 114 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
115 add_stmt_info_to_vec (body_cost_vec, count, kind,
116 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
117 misalign);
4db2b577 118 return (unsigned)
f97dec81 119 (builtin_vectorization_cost (kind, vectype, misalign) * count);
4db2b577 120
121 }
122 else
123 {
124 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
125 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
126 void *target_cost_data;
127
128 if (loop_vinfo)
129 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
130 else
131 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
132
f97dec81 133 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
134 misalign, where);
4db2b577 135 }
136}
137
94b7b4dd 138/* Return a variable of type ELEM_TYPE[NELEMS]. */
139
140static tree
141create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
142{
143 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
144 "vect_array");
145}
146
147/* ARRAY is an array of vectors created by create_vector_array.
148 Return an SSA_NAME for the vector in index N. The reference
149 is part of the vectorization of STMT and the vector is associated
150 with scalar destination SCALAR_DEST. */
151
152static tree
153read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
154 tree array, unsigned HOST_WIDE_INT n)
155{
156 tree vect_type, vect, vect_name, array_ref;
157 gimple new_stmt;
158
159 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
160 vect_type = TREE_TYPE (TREE_TYPE (array));
161 vect = vect_create_destination_var (scalar_dest, vect_type);
162 array_ref = build4 (ARRAY_REF, vect_type, array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
165
166 new_stmt = gimple_build_assign (vect, array_ref);
167 vect_name = make_ssa_name (vect, new_stmt);
168 gimple_assign_set_lhs (new_stmt, vect_name);
169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
94b7b4dd 170
171 return vect_name;
172}
173
174/* ARRAY is an array of vectors created by create_vector_array.
175 Emit code to store SSA_NAME VECT in index N of the array.
176 The store is part of the vectorization of STMT. */
177
178static void
179write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
180 tree array, unsigned HOST_WIDE_INT n)
181{
182 tree array_ref;
183 gimple new_stmt;
184
185 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
186 build_int_cst (size_type_node, n),
187 NULL_TREE, NULL_TREE);
188
189 new_stmt = gimple_build_assign (array_ref, vect);
190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
94b7b4dd 191}
192
193/* PTR is a pointer to an array of type TYPE. Return a representation
194 of *PTR. The memory reference replaces those in FIRST_DR
195 (and its group). */
196
197static tree
198create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
199{
94b7b4dd 200 tree mem_ref, alias_ptr_type;
201
202 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
203 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
204 /* Arrays have the same alignment as their type. */
ceea063b 205 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
94b7b4dd 206 return mem_ref;
207}
208
fb85abff 209/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
210
211/* Function vect_mark_relevant.
212
213 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
214
215static void
f1f41a6c 216vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
8bf58742 217 enum vect_relevant relevant, bool live_p,
218 bool used_in_pattern)
fb85abff 219{
220 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
221 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
222 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
8bf58742 223 gimple pattern_stmt;
fb85abff 224
6d8fb6cf 225 if (dump_enabled_p ())
7bd765d4 226 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 227 "mark relevant %d, live %d.\n", relevant, live_p);
fb85abff 228
8bf58742 229 /* If this stmt is an original stmt in a pattern, we might need to mark its
230 related pattern stmt instead of the original stmt. However, such stmts
231 may have their own uses that are not in any pattern, in such cases the
232 stmt itself should be marked. */
fb85abff 233 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
234 {
8bf58742 235 bool found = false;
236 if (!used_in_pattern)
237 {
238 imm_use_iterator imm_iter;
239 use_operand_p use_p;
240 gimple use_stmt;
241 tree lhs;
ddee24bb 242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
243 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 244
8bf58742 245 if (is_gimple_assign (stmt))
246 lhs = gimple_assign_lhs (stmt);
247 else
248 lhs = gimple_call_lhs (stmt);
fb85abff 249
8bf58742 250 /* This use is out of pattern use, if LHS has other uses that are
251 pattern uses, we should mark the stmt itself, and not the pattern
252 stmt. */
c71d3c24 253 if (lhs && TREE_CODE (lhs) == SSA_NAME)
d6152abc 254 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
255 {
256 if (is_gimple_debug (USE_STMT (use_p)))
257 continue;
258 use_stmt = USE_STMT (use_p);
259
ddee24bb 260 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
261 continue;
262
d6152abc 263 if (vinfo_for_stmt (use_stmt)
264 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
265 {
266 found = true;
267 break;
268 }
269 }
8bf58742 270 }
271
272 if (!found)
273 {
274 /* This is the last stmt in a sequence that was detected as a
275 pattern that can potentially be vectorized. Don't mark the stmt
276 as relevant/live because it's not going to be vectorized.
277 Instead mark the pattern-stmt that replaces it. */
278
279 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
280
6d8fb6cf 281 if (dump_enabled_p ())
7bd765d4 282 dump_printf_loc (MSG_NOTE, vect_location,
283 "last stmt in pattern. don't mark"
78bb46f5 284 " relevant/live.\n");
8bf58742 285 stmt_info = vinfo_for_stmt (pattern_stmt);
286 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
287 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
288 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
289 stmt = pattern_stmt;
290 }
fb85abff 291 }
292
293 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
294 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
295 STMT_VINFO_RELEVANT (stmt_info) = relevant;
296
297 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
298 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
299 {
6d8fb6cf 300 if (dump_enabled_p ())
7bd765d4 301 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 302 "already marked relevant/live.\n");
fb85abff 303 return;
304 }
305
f1f41a6c 306 worklist->safe_push (stmt);
fb85abff 307}
308
309
310/* Function vect_stmt_relevant_p.
311
312 Return true if STMT in loop that is represented by LOOP_VINFO is
313 "relevant for vectorization".
314
315 A stmt is considered "relevant for vectorization" if:
316 - it has uses outside the loop.
317 - it has vdefs (it alters memory).
318 - control stmts in the loop (except for the exit condition).
319
320 CHECKME: what other side effects would the vectorizer allow? */
321
322static bool
323vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
324 enum vect_relevant *relevant, bool *live_p)
325{
326 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
327 ssa_op_iter op_iter;
328 imm_use_iterator imm_iter;
329 use_operand_p use_p;
330 def_operand_p def_p;
331
f083cd24 332 *relevant = vect_unused_in_scope;
fb85abff 333 *live_p = false;
334
335 /* cond stmt other than loop exit cond. */
48e1416a 336 if (is_ctrl_stmt (stmt)
337 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
338 != loop_exit_ctrl_vec_info_type)
f083cd24 339 *relevant = vect_used_in_scope;
fb85abff 340
341 /* changing memory. */
342 if (gimple_code (stmt) != GIMPLE_PHI)
dd277d48 343 if (gimple_vdef (stmt))
fb85abff 344 {
6d8fb6cf 345 if (dump_enabled_p ())
7bd765d4 346 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 347 "vec_stmt_relevant_p: stmt has vdefs.\n");
f083cd24 348 *relevant = vect_used_in_scope;
fb85abff 349 }
350
351 /* uses outside the loop. */
352 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
353 {
354 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
355 {
356 basic_block bb = gimple_bb (USE_STMT (use_p));
357 if (!flow_bb_inside_loop_p (loop, bb))
358 {
6d8fb6cf 359 if (dump_enabled_p ())
7bd765d4 360 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 361 "vec_stmt_relevant_p: used out of loop.\n");
fb85abff 362
8a5b28c9 363 if (is_gimple_debug (USE_STMT (use_p)))
364 continue;
365
fb85abff 366 /* We expect all such uses to be in the loop exit phis
367 (because of loop closed form) */
368 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
369 gcc_assert (bb == single_exit (loop)->dest);
370
371 *live_p = true;
372 }
373 }
374 }
375
376 return (*live_p || *relevant);
377}
378
379
48e1416a 380/* Function exist_non_indexing_operands_for_use_p
fb85abff 381
282bf14c 382 USE is one of the uses attached to STMT. Check if USE is
fb85abff 383 used in STMT for anything other than indexing an array. */
384
385static bool
386exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
387{
388 tree operand;
389 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
567008ff 390
282bf14c 391 /* USE corresponds to some operand in STMT. If there is no data
fb85abff 392 reference in STMT, then any operand that corresponds to USE
393 is not indexing an array. */
394 if (!STMT_VINFO_DATA_REF (stmt_info))
395 return true;
567008ff 396
fb85abff 397 /* STMT has a data_ref. FORNOW this means that its of one of
398 the following forms:
399 -1- ARRAY_REF = var
400 -2- var = ARRAY_REF
401 (This should have been verified in analyze_data_refs).
402
403 'var' in the second case corresponds to a def, not a use,
48e1416a 404 so USE cannot correspond to any operands that are not used
fb85abff 405 for array indexing.
406
407 Therefore, all we need to check is if STMT falls into the
408 first case, and whether var corresponds to USE. */
fb85abff 409
410 if (!gimple_assign_copy_p (stmt))
c71d3c24 411 {
412 if (is_gimple_call (stmt)
413 && gimple_call_internal_p (stmt))
414 switch (gimple_call_internal_fn (stmt))
415 {
416 case IFN_MASK_STORE:
417 operand = gimple_call_arg (stmt, 3);
418 if (operand == use)
419 return true;
420 /* FALLTHRU */
421 case IFN_MASK_LOAD:
422 operand = gimple_call_arg (stmt, 2);
423 if (operand == use)
424 return true;
425 break;
426 default:
427 break;
428 }
429 return false;
430 }
431
567008ff 432 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
433 return false;
fb85abff 434 operand = gimple_assign_rhs1 (stmt);
fb85abff 435 if (TREE_CODE (operand) != SSA_NAME)
436 return false;
437
438 if (operand == use)
439 return true;
440
441 return false;
442}
443
444
48e1416a 445/*
fb85abff 446 Function process_use.
447
448 Inputs:
449 - a USE in STMT in a loop represented by LOOP_VINFO
48e1416a 450 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
282bf14c 451 that defined USE. This is done by calling mark_relevant and passing it
fb85abff 452 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
16dfb112 453 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
454 be performed.
fb85abff 455
456 Outputs:
457 Generally, LIVE_P and RELEVANT are used to define the liveness and
458 relevance info of the DEF_STMT of this USE:
459 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
460 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
461 Exceptions:
462 - case 1: If USE is used only for address computations (e.g. array indexing),
48e1416a 463 which does not need to be directly vectorized, then the liveness/relevance
fb85abff 464 of the respective DEF_STMT is left unchanged.
48e1416a 465 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
466 skip DEF_STMT cause it had already been processed.
fb85abff 467 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
468 be modified accordingly.
469
470 Return true if everything is as expected. Return false otherwise. */
471
472static bool
48e1416a 473process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
f1f41a6c 474 enum vect_relevant relevant, vec<gimple> *worklist,
16dfb112 475 bool force)
fb85abff 476{
477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
479 stmt_vec_info dstmt_vinfo;
480 basic_block bb, def_bb;
481 tree def;
482 gimple def_stmt;
483 enum vect_def_type dt;
484
48e1416a 485 /* case 1: we are only interested in uses that need to be vectorized. Uses
fb85abff 486 that are used for address computation are not considered relevant. */
16dfb112 487 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
fb85abff 488 return true;
489
bed8b93b 490 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
48e1416a 491 {
6d8fb6cf 492 if (dump_enabled_p ())
7bd765d4 493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 494 "not vectorized: unsupported use in stmt.\n");
fb85abff 495 return false;
496 }
497
498 if (!def_stmt || gimple_nop_p (def_stmt))
499 return true;
500
501 def_bb = gimple_bb (def_stmt);
502 if (!flow_bb_inside_loop_p (loop, def_bb))
503 {
6d8fb6cf 504 if (dump_enabled_p ())
78bb46f5 505 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
fb85abff 506 return true;
507 }
508
48e1416a 509 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
510 DEF_STMT must have already been processed, because this should be the
511 only way that STMT, which is a reduction-phi, was put in the worklist,
512 as there should be no other uses for DEF_STMT in the loop. So we just
fb85abff 513 check that everything is as expected, and we are done. */
514 dstmt_vinfo = vinfo_for_stmt (def_stmt);
515 bb = gimple_bb (stmt);
516 if (gimple_code (stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (def_stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
521 {
6d8fb6cf 522 if (dump_enabled_p ())
7bd765d4 523 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 524 "reduc-stmt defining reduc-phi in the same nest.\n");
fb85abff 525 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
526 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
527 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
48e1416a 528 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
f083cd24 529 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
fb85abff 530 return true;
531 }
532
533 /* case 3a: outer-loop stmt defining an inner-loop stmt:
534 outer-loop-header-bb:
535 d = def_stmt
536 inner-loop:
537 stmt # use (d)
538 outer-loop-tail-bb:
539 ... */
540 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
541 {
6d8fb6cf 542 if (dump_enabled_p ())
7bd765d4 543 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 544 "outer-loop def-stmt defining inner-loop stmt.\n");
ade2ac53 545
fb85abff 546 switch (relevant)
547 {
f083cd24 548 case vect_unused_in_scope:
ade2ac53 549 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
550 vect_used_in_scope : vect_unused_in_scope;
fb85abff 551 break;
ade2ac53 552
fb85abff 553 case vect_used_in_outer_by_reduction:
ade2ac53 554 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
fb85abff 555 relevant = vect_used_by_reduction;
556 break;
ade2ac53 557
fb85abff 558 case vect_used_in_outer:
ade2ac53 559 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
f083cd24 560 relevant = vect_used_in_scope;
fb85abff 561 break;
ade2ac53 562
f083cd24 563 case vect_used_in_scope:
fb85abff 564 break;
565
566 default:
567 gcc_unreachable ();
48e1416a 568 }
fb85abff 569 }
570
571 /* case 3b: inner-loop stmt defining an outer-loop stmt:
572 outer-loop-header-bb:
573 ...
574 inner-loop:
575 d = def_stmt
7aa0d350 576 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
fb85abff 577 stmt # use (d) */
578 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
579 {
6d8fb6cf 580 if (dump_enabled_p ())
7bd765d4 581 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 582 "inner-loop def-stmt defining outer-loop stmt.\n");
ade2ac53 583
fb85abff 584 switch (relevant)
585 {
f083cd24 586 case vect_unused_in_scope:
48e1416a 587 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
7aa0d350 588 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
37545e54 589 vect_used_in_outer_by_reduction : vect_unused_in_scope;
fb85abff 590 break;
591
fb85abff 592 case vect_used_by_reduction:
593 relevant = vect_used_in_outer_by_reduction;
594 break;
595
f083cd24 596 case vect_used_in_scope:
fb85abff 597 relevant = vect_used_in_outer;
598 break;
599
600 default:
601 gcc_unreachable ();
602 }
603 }
604
8bf58742 605 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
606 is_pattern_stmt_p (stmt_vinfo));
fb85abff 607 return true;
608}
609
610
611/* Function vect_mark_stmts_to_be_vectorized.
612
613 Not all stmts in the loop need to be vectorized. For example:
614
615 for i...
616 for j...
617 1. T0 = i + j
618 2. T1 = a[T0]
619
620 3. j = j + 1
621
622 Stmt 1 and 3 do not need to be vectorized, because loop control and
623 addressing of vectorized data-refs are handled differently.
624
625 This pass detects such stmts. */
626
627bool
628vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
629{
fb85abff 630 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
631 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
632 unsigned int nbbs = loop->num_nodes;
633 gimple_stmt_iterator si;
634 gimple stmt;
635 unsigned int i;
636 stmt_vec_info stmt_vinfo;
637 basic_block bb;
638 gimple phi;
639 bool live_p;
7aa0d350 640 enum vect_relevant relevant, tmp_relevant;
641 enum vect_def_type def_type;
fb85abff 642
6d8fb6cf 643 if (dump_enabled_p ())
7bd765d4 644 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 645 "=== vect_mark_stmts_to_be_vectorized ===\n");
fb85abff 646
4997014d 647 auto_vec<gimple, 64> worklist;
fb85abff 648
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
651 {
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
48e1416a 654 {
fb85abff 655 phi = gsi_stmt (si);
6d8fb6cf 656 if (dump_enabled_p ())
fb85abff 657 {
7bd765d4 658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
78bb46f5 660 dump_printf (MSG_NOTE, "\n");
fb85abff 661 }
662
663 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
8bf58742 664 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
fb85abff 665 }
666 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
667 {
668 stmt = gsi_stmt (si);
6d8fb6cf 669 if (dump_enabled_p ())
fb85abff 670 {
7bd765d4 671 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
672 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 673 dump_printf (MSG_NOTE, "\n");
48e1416a 674 }
fb85abff 675
676 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
8bf58742 677 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
fb85abff 678 }
679 }
680
681 /* 2. Process_worklist */
f1f41a6c 682 while (worklist.length () > 0)
fb85abff 683 {
684 use_operand_p use_p;
685 ssa_op_iter iter;
686
f1f41a6c 687 stmt = worklist.pop ();
6d8fb6cf 688 if (dump_enabled_p ())
fb85abff 689 {
7bd765d4 690 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 692 dump_printf (MSG_NOTE, "\n");
fb85abff 693 }
694
48e1416a 695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant and live/dead according to the
fb85abff 697 liveness and relevance properties of STMT. */
698 stmt_vinfo = vinfo_for_stmt (stmt);
699 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
701
702 /* Generally, the liveness and relevance properties of STMT are
703 propagated as is to the DEF_STMTs of its USEs:
704 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
705 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
706
707 One exception is when STMT has been identified as defining a reduction
708 variable; in this case we set the liveness/relevance as follows:
709 live_p = false
710 relevant = vect_used_by_reduction
711 This is because we distinguish between two kinds of relevant stmts -
48e1416a 712 those that are used by a reduction computation, and those that are
282bf14c 713 (also) used by a regular computation. This allows us later on to
48e1416a 714 identify stmts that are used solely by a reduction, and therefore the
ade2ac53 715 order of the results that they produce does not have to be kept. */
fb85abff 716
7aa0d350 717 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
718 tmp_relevant = relevant;
719 switch (def_type)
fb85abff 720 {
7aa0d350 721 case vect_reduction_def:
722 switch (tmp_relevant)
723 {
724 case vect_unused_in_scope:
725 relevant = vect_used_by_reduction;
726 break;
727
728 case vect_used_by_reduction:
729 if (gimple_code (stmt) == GIMPLE_PHI)
730 break;
731 /* fall through */
732
733 default:
6d8fb6cf 734 if (dump_enabled_p ())
7bd765d4 735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 736 "unsupported use of reduction.\n");
7aa0d350 737 return false;
738 }
739
48e1416a 740 live_p = false;
7aa0d350 741 break;
48e1416a 742
7aa0d350 743 case vect_nested_cycle:
744 if (tmp_relevant != vect_unused_in_scope
745 && tmp_relevant != vect_used_in_outer_by_reduction
746 && tmp_relevant != vect_used_in_outer)
747 {
6d8fb6cf 748 if (dump_enabled_p ())
7bd765d4 749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 750 "unsupported use of nested cycle.\n");
ade2ac53 751
7aa0d350 752 return false;
753 }
ade2ac53 754
48e1416a 755 live_p = false;
756 break;
757
7aa0d350 758 case vect_double_reduction_def:
759 if (tmp_relevant != vect_unused_in_scope
760 && tmp_relevant != vect_used_by_reduction)
761 {
6d8fb6cf 762 if (dump_enabled_p ())
7bd765d4 763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 764 "unsupported use of double reduction.\n");
ade2ac53 765
ade2ac53 766 return false;
7aa0d350 767 }
768
769 live_p = false;
48e1416a 770 break;
ade2ac53 771
7aa0d350 772 default:
773 break;
ade2ac53 774 }
48e1416a 775
16dfb112 776 if (is_pattern_stmt_p (stmt_vinfo))
cfdcf183 777 {
778 /* Pattern statements are not inserted into the code, so
779 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
780 have to scan the RHS or function arguments instead. */
781 if (is_gimple_assign (stmt))
782 {
84557284 783 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
784 tree op = gimple_assign_rhs1 (stmt);
785
786 i = 1;
787 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
788 {
789 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
16dfb112 790 live_p, relevant, &worklist, false)
84557284 791 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
16dfb112 792 live_p, relevant, &worklist, false))
e85cf4e5 793 return false;
84557284 794 i = 2;
795 }
796 for (; i < gimple_num_ops (stmt); i++)
cfdcf183 797 {
84557284 798 op = gimple_op (stmt, i);
cfdcf183 799 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
16dfb112 800 &worklist, false))
e85cf4e5 801 return false;
cfdcf183 802 }
803 }
804 else if (is_gimple_call (stmt))
805 {
806 for (i = 0; i < gimple_call_num_args (stmt); i++)
807 {
808 tree arg = gimple_call_arg (stmt, i);
809 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
16dfb112 810 &worklist, false))
e85cf4e5 811 return false;
cfdcf183 812 }
813 }
814 }
815 else
816 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
817 {
818 tree op = USE_FROM_PTR (use_p);
819 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
16dfb112 820 &worklist, false))
e85cf4e5 821 return false;
cfdcf183 822 }
16dfb112 823
824 if (STMT_VINFO_GATHER_P (stmt_vinfo))
825 {
826 tree off;
827 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
828 gcc_assert (decl);
829 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
830 &worklist, true))
e85cf4e5 831 return false;
16dfb112 832 }
fb85abff 833 } /* while worklist */
834
fb85abff 835 return true;
836}
837
838
48e1416a 839/* Function vect_model_simple_cost.
fb85abff 840
48e1416a 841 Models cost for simple operations, i.e. those that only emit ncopies of a
fb85abff 842 single op. Right now, this does not account for multiple insns that could
843 be generated for the single vector op. We will handle that shortly. */
844
845void
48e1416a 846vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
f97dec81 847 enum vect_def_type *dt,
848 stmt_vector_for_cost *prologue_cost_vec,
849 stmt_vector_for_cost *body_cost_vec)
fb85abff 850{
851 int i;
f97dec81 852 int inside_cost = 0, prologue_cost = 0;
fb85abff 853
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info))
856 return;
857
fb85abff 858 /* FORNOW: Assuming maximum 2 args per stmts. */
859 for (i = 0; i < 2; i++)
f97dec81 860 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
861 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
862 stmt_info, 0, vect_prologue);
4db2b577 863
864 /* Pass the inside-of-loop statements to the target-specific cost model. */
f97dec81 865 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
866 stmt_info, 0, vect_body);
4db2b577 867
6d8fb6cf 868 if (dump_enabled_p ())
7bd765d4 869 dump_printf_loc (MSG_NOTE, vect_location,
870 "vect_model_simple_cost: inside_cost = %d, "
78bb46f5 871 "prologue_cost = %d .\n", inside_cost, prologue_cost);
fb85abff 872}
873
874
5df2530b 875/* Model cost for type demotion and promotion operations. PWR is normally
876 zero for single-step promotions and demotions. It will be one if
877 two-step promotion/demotion is required, and so on. Each additional
878 step doubles the number of instructions required. */
879
880static void
881vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
882 enum vect_def_type *dt, int pwr)
883{
884 int i, tmp;
f97dec81 885 int inside_cost = 0, prologue_cost = 0;
4db2b577 886 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
887 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
888 void *target_cost_data;
5df2530b 889
890 /* The SLP costs were already calculated during SLP tree build. */
891 if (PURE_SLP_STMT (stmt_info))
892 return;
893
4db2b577 894 if (loop_vinfo)
895 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
896 else
897 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
898
5df2530b 899 for (i = 0; i < pwr + 1; i++)
900 {
901 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
902 (i + 1) : i;
4db2b577 903 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
f97dec81 904 vec_promote_demote, stmt_info, 0,
905 vect_body);
5df2530b 906 }
907
908 /* FORNOW: Assuming maximum 2 args per stmts. */
909 for (i = 0; i < 2; i++)
f97dec81 910 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
911 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
912 stmt_info, 0, vect_prologue);
5df2530b 913
6d8fb6cf 914 if (dump_enabled_p ())
7bd765d4 915 dump_printf_loc (MSG_NOTE, vect_location,
916 "vect_model_promotion_demotion_cost: inside_cost = %d, "
78bb46f5 917 "prologue_cost = %d .\n", inside_cost, prologue_cost);
5df2530b 918}
919
ee612634 920/* Function vect_cost_group_size
48e1416a 921
ee612634 922 For grouped load or store, return the group_size only if it is the first
fb85abff 923 load or store of a group, else return 1. This ensures that group size is
924 only returned once per group. */
925
926static int
ee612634 927vect_cost_group_size (stmt_vec_info stmt_info)
fb85abff 928{
21009880 929 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
fb85abff 930
931 if (first_stmt == STMT_VINFO_STMT (stmt_info))
21009880 932 return GROUP_SIZE (stmt_info);
fb85abff 933
934 return 1;
935}
936
937
938/* Function vect_model_store_cost
939
ee612634 940 Models cost for stores. In the case of grouped accesses, one access
941 has the overhead of the grouped access attributed to it. */
fb85abff 942
943void
48e1416a 944vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
94b7b4dd 945 bool store_lanes_p, enum vect_def_type dt,
f97dec81 946 slp_tree slp_node,
947 stmt_vector_for_cost *prologue_cost_vec,
948 stmt_vector_for_cost *body_cost_vec)
fb85abff 949{
950 int group_size;
f97dec81 951 unsigned int inside_cost = 0, prologue_cost = 0;
0822b158 952 struct data_reference *first_dr;
953 gimple first_stmt;
fb85abff 954
955 /* The SLP costs were already calculated during SLP tree build. */
956 if (PURE_SLP_STMT (stmt_info))
957 return;
958
f083cd24 959 if (dt == vect_constant_def || dt == vect_external_def)
f97dec81 960 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
961 stmt_info, 0, vect_prologue);
fb85abff 962
ee612634 963 /* Grouped access? */
964 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
0822b158 965 {
966 if (slp_node)
967 {
f1f41a6c 968 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
0822b158 969 group_size = 1;
970 }
971 else
972 {
21009880 973 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ee612634 974 group_size = vect_cost_group_size (stmt_info);
0822b158 975 }
976
977 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
978 }
ee612634 979 /* Not a grouped access. */
fb85abff 980 else
0822b158 981 {
982 group_size = 1;
983 first_dr = STMT_VINFO_DATA_REF (stmt_info);
984 }
fb85abff 985
94b7b4dd 986 /* We assume that the cost of a single store-lanes instruction is
ee612634 987 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
94b7b4dd 988 access is instead being provided by a permute-and-store operation,
989 include the cost of the permutes. */
990 if (!store_lanes_p && group_size > 1)
fb85abff 991 {
d53391a8 992 /* Uses a high and low interleave or shuffle operations for each
993 needed permute. */
994 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
f97dec81 995 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
996 stmt_info, 0, vect_body);
fb85abff 997
6d8fb6cf 998 if (dump_enabled_p ())
7bd765d4 999 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1000 "vect_model_store_cost: strided group_size = %d .\n",
7bd765d4 1001 group_size);
fb85abff 1002 }
1003
1004 /* Costs of the stores. */
f97dec81 1005 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
fb85abff 1006
6d8fb6cf 1007 if (dump_enabled_p ())
7bd765d4 1008 dump_printf_loc (MSG_NOTE, vect_location,
1009 "vect_model_store_cost: inside_cost = %d, "
78bb46f5 1010 "prologue_cost = %d .\n", inside_cost, prologue_cost);
fb85abff 1011}
1012
1013
0822b158 1014/* Calculate cost of DR's memory access. */
1015void
1016vect_get_store_cost (struct data_reference *dr, int ncopies,
4db2b577 1017 unsigned int *inside_cost,
f97dec81 1018 stmt_vector_for_cost *body_cost_vec)
0822b158 1019{
1020 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4db2b577 1021 gimple stmt = DR_STMT (dr);
1022 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
0822b158 1023
1024 switch (alignment_support_scheme)
1025 {
1026 case dr_aligned:
1027 {
f97dec81 1028 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1029 vector_store, stmt_info, 0,
1030 vect_body);
0822b158 1031
6d8fb6cf 1032 if (dump_enabled_p ())
7bd765d4 1033 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1034 "vect_model_store_cost: aligned.\n");
0822b158 1035 break;
1036 }
1037
1038 case dr_unaligned_supported:
1039 {
0822b158 1040 /* Here, we assign an additional cost for the unaligned store. */
f97dec81 1041 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
4db2b577 1042 unaligned_store, stmt_info,
f97dec81 1043 DR_MISALIGNMENT (dr), vect_body);
6d8fb6cf 1044 if (dump_enabled_p ())
7bd765d4 1045 dump_printf_loc (MSG_NOTE, vect_location,
1046 "vect_model_store_cost: unaligned supported by "
78bb46f5 1047 "hardware.\n");
0822b158 1048 break;
1049 }
1050
1ad41595 1051 case dr_unaligned_unsupported:
1052 {
1053 *inside_cost = VECT_MAX_COST;
1054
6d8fb6cf 1055 if (dump_enabled_p ())
7bd765d4 1056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1057 "vect_model_store_cost: unsupported access.\n");
1ad41595 1058 break;
1059 }
1060
0822b158 1061 default:
1062 gcc_unreachable ();
1063 }
1064}
1065
1066
fb85abff 1067/* Function vect_model_load_cost
1068
ee612634 1069 Models cost for loads. In the case of grouped accesses, the last access
1070 has the overhead of the grouped access attributed to it. Since unaligned
48e1416a 1071 accesses are supported for loads, we also account for the costs of the
fb85abff 1072 access scheme chosen. */
1073
1074void
f97dec81 1075vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1076 bool load_lanes_p, slp_tree slp_node,
1077 stmt_vector_for_cost *prologue_cost_vec,
1078 stmt_vector_for_cost *body_cost_vec)
fb85abff 1079{
1080 int group_size;
fb85abff 1081 gimple first_stmt;
1082 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
f97dec81 1083 unsigned int inside_cost = 0, prologue_cost = 0;
fb85abff 1084
1085 /* The SLP costs were already calculated during SLP tree build. */
1086 if (PURE_SLP_STMT (stmt_info))
1087 return;
1088
ee612634 1089 /* Grouped accesses? */
21009880 1090 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
ee612634 1091 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
fb85abff 1092 {
ee612634 1093 group_size = vect_cost_group_size (stmt_info);
fb85abff 1094 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1095 }
ee612634 1096 /* Not a grouped access. */
fb85abff 1097 else
1098 {
1099 group_size = 1;
1100 first_dr = dr;
1101 }
1102
94b7b4dd 1103 /* We assume that the cost of a single load-lanes instruction is
ee612634 1104 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
94b7b4dd 1105 access is instead being provided by a load-and-permute operation,
1106 include the cost of the permutes. */
1107 if (!load_lanes_p && group_size > 1)
fb85abff 1108 {
1e1bca71 1109 /* Uses an even and odd extract operations or shuffle operations
1110 for each needed permute. */
1111 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1112 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1113 stmt_info, 0, vect_body);
fb85abff 1114
6d8fb6cf 1115 if (dump_enabled_p ())
78bb46f5 1116 dump_printf_loc (MSG_NOTE, vect_location,
1117 "vect_model_load_cost: strided group_size = %d .\n",
7bd765d4 1118 group_size);
fb85abff 1119 }
1120
1121 /* The loads themselves. */
506aa8fc 1122 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1123 {
d13adc77 1124 /* N scalar loads plus gathering them into a vector. */
1125 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
f97dec81 1126 inside_cost += record_stmt_cost (body_cost_vec,
4db2b577 1127 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
f97dec81 1128 scalar_load, stmt_info, 0, vect_body);
1129 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1130 stmt_info, 0, vect_body);
506aa8fc 1131 }
1132 else
1133 vect_get_load_cost (first_dr, ncopies,
1134 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1135 || group_size > 1 || slp_node),
f97dec81 1136 &inside_cost, &prologue_cost,
1137 prologue_cost_vec, body_cost_vec, true);
0822b158 1138
6d8fb6cf 1139 if (dump_enabled_p ())
7bd765d4 1140 dump_printf_loc (MSG_NOTE, vect_location,
1141 "vect_model_load_cost: inside_cost = %d, "
78bb46f5 1142 "prologue_cost = %d .\n", inside_cost, prologue_cost);
0822b158 1143}
1144
1145
1146/* Calculate cost of DR's memory access. */
1147void
1148vect_get_load_cost (struct data_reference *dr, int ncopies,
4db2b577 1149 bool add_realign_cost, unsigned int *inside_cost,
f97dec81 1150 unsigned int *prologue_cost,
1151 stmt_vector_for_cost *prologue_cost_vec,
1152 stmt_vector_for_cost *body_cost_vec,
1153 bool record_prologue_costs)
0822b158 1154{
1155 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4db2b577 1156 gimple stmt = DR_STMT (dr);
1157 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
0822b158 1158
1159 switch (alignment_support_scheme)
fb85abff 1160 {
1161 case dr_aligned:
1162 {
f97dec81 1163 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1164 stmt_info, 0, vect_body);
fb85abff 1165
6d8fb6cf 1166 if (dump_enabled_p ())
7bd765d4 1167 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1168 "vect_model_load_cost: aligned.\n");
fb85abff 1169
1170 break;
1171 }
1172 case dr_unaligned_supported:
1173 {
0822b158 1174 /* Here, we assign an additional cost for the unaligned load. */
f97dec81 1175 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
4db2b577 1176 unaligned_load, stmt_info,
f97dec81 1177 DR_MISALIGNMENT (dr), vect_body);
4db2b577 1178
6d8fb6cf 1179 if (dump_enabled_p ())
7bd765d4 1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: unaligned supported by "
78bb46f5 1182 "hardware.\n");
fb85abff 1183
1184 break;
1185 }
1186 case dr_explicit_realign:
1187 {
f97dec81 1188 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1189 vector_load, stmt_info, 0, vect_body);
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1191 vec_perm, stmt_info, 0, vect_body);
fb85abff 1192
1193 /* FIXME: If the misalignment remains fixed across the iterations of
1194 the containing loop, the following cost should be added to the
f97dec81 1195 prologue costs. */
fb85abff 1196 if (targetm.vectorize.builtin_mask_for_load)
f97dec81 1197 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1198 stmt_info, 0, vect_body);
fb85abff 1199
6d8fb6cf 1200 if (dump_enabled_p ())
78bb46f5 1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign\n");
5df2530b 1203
fb85abff 1204 break;
1205 }
1206 case dr_explicit_realign_optimized:
1207 {
6d8fb6cf 1208 if (dump_enabled_p ())
78bb46f5 1209 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 1210 "vect_model_load_cost: unaligned software "
78bb46f5 1211 "pipelined.\n");
fb85abff 1212
1213 /* Unaligned software pipeline has a load of an address, an initial
282bf14c 1214 load, and possibly a mask operation to "prime" the loop. However,
ee612634 1215 if this is an access in a group of loads, which provide grouped
fb85abff 1216 access, then the above cost should only be considered for one
282bf14c 1217 access in the group. Inside the loop, there is a load op
fb85abff 1218 and a realignment op. */
1219
f97dec81 1220 if (add_realign_cost && record_prologue_costs)
fb85abff 1221 {
f97dec81 1222 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1223 vector_stmt, stmt_info,
1224 0, vect_prologue);
fb85abff 1225 if (targetm.vectorize.builtin_mask_for_load)
f97dec81 1226 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1227 vector_stmt, stmt_info,
1228 0, vect_prologue);
fb85abff 1229 }
1230
f97dec81 1231 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1232 stmt_info, 0, vect_body);
1233 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1234 stmt_info, 0, vect_body);
5df2530b 1235
6d8fb6cf 1236 if (dump_enabled_p ())
7bd765d4 1237 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1238 "vect_model_load_cost: explicit realign optimized"
1239 "\n");
5df2530b 1240
fb85abff 1241 break;
1242 }
1243
1ad41595 1244 case dr_unaligned_unsupported:
1245 {
1246 *inside_cost = VECT_MAX_COST;
1247
6d8fb6cf 1248 if (dump_enabled_p ())
7bd765d4 1249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1250 "vect_model_load_cost: unsupported access.\n");
1ad41595 1251 break;
1252 }
1253
fb85abff 1254 default:
1255 gcc_unreachable ();
1256 }
fb85abff 1257}
1258
8ca02f71 1259/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1260 the loop preheader for the vectorized stmt STMT. */
fb85abff 1261
8ca02f71 1262static void
1263vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
fb85abff 1264{
fb85abff 1265 if (gsi)
8ca02f71 1266 vect_finish_stmt_generation (stmt, new_stmt, gsi);
fb85abff 1267 else
1268 {
8ca02f71 1269 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
fb85abff 1270 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
48e1416a 1271
37545e54 1272 if (loop_vinfo)
1273 {
1274 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8ca02f71 1275 basic_block new_bb;
1276 edge pe;
37545e54 1277
1278 if (nested_in_vect_loop_p (loop, stmt))
1279 loop = loop->inner;
48e1416a 1280
37545e54 1281 pe = loop_preheader_edge (loop);
8ca02f71 1282 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
37545e54 1283 gcc_assert (!new_bb);
1284 }
1285 else
1286 {
1287 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1288 basic_block bb;
1289 gimple_stmt_iterator gsi_bb_start;
1290
1291 gcc_assert (bb_vinfo);
1292 bb = BB_VINFO_BB (bb_vinfo);
765a637f 1293 gsi_bb_start = gsi_after_labels (bb);
8ca02f71 1294 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
37545e54 1295 }
fb85abff 1296 }
1297
6d8fb6cf 1298 if (dump_enabled_p ())
fb85abff 1299 {
7bd765d4 1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "created new init_stmt: ");
1302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
78bb46f5 1303 dump_printf (MSG_NOTE, "\n");
fb85abff 1304 }
8ca02f71 1305}
1306
1307/* Function vect_init_vector.
fb85abff 1308
a1dfcdd0 1309 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1310 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1311 vector type a vector with all elements equal to VAL is created first.
1312 Place the initialization at BSI if it is not NULL. Otherwise, place the
1313 initialization at the loop preheader.
8ca02f71 1314 Return the DEF of INIT_STMT.
1315 It will be used in the vectorization of STMT. */
1316
1317tree
a1dfcdd0 1318vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
8ca02f71 1319{
1320 tree new_var;
1321 gimple init_stmt;
1322 tree vec_oprnd;
1323 tree new_temp;
1324
a1dfcdd0 1325 if (TREE_CODE (type) == VECTOR_TYPE
1326 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
8ca02f71 1327 {
a1dfcdd0 1328 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
8ca02f71 1329 {
a1dfcdd0 1330 if (CONSTANT_CLASS_P (val))
1331 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
8ca02f71 1332 else
1333 {
03d37e4e 1334 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
8ca02f71 1335 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
03d37e4e 1336 new_temp, val,
8ca02f71 1337 NULL_TREE);
8ca02f71 1338 vect_init_vector_1 (stmt, init_stmt, gsi);
a1dfcdd0 1339 val = new_temp;
8ca02f71 1340 }
1341 }
a1dfcdd0 1342 val = build_vector_from_val (type, val);
8ca02f71 1343 }
1344
a1dfcdd0 1345 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
a1dfcdd0 1346 init_stmt = gimple_build_assign (new_var, val);
8ca02f71 1347 new_temp = make_ssa_name (new_var, init_stmt);
1348 gimple_assign_set_lhs (init_stmt, new_temp);
1349 vect_init_vector_1 (stmt, init_stmt, gsi);
fb85abff 1350 vec_oprnd = gimple_assign_lhs (init_stmt);
1351 return vec_oprnd;
1352}
1353
37545e54 1354
fb85abff 1355/* Function vect_get_vec_def_for_operand.
1356
282bf14c 1357 OP is an operand in STMT. This function returns a (vector) def that will be
fb85abff 1358 used in the vectorized stmt for STMT.
1359
1360 In the case that OP is an SSA_NAME which is defined in the loop, then
1361 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1362
1363 In case OP is an invariant or constant, a new stmt that creates a vector def
1364 needs to be introduced. */
1365
1366tree
1367vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1368{
1369 tree vec_oprnd;
1370 gimple vec_stmt;
1371 gimple def_stmt;
1372 stmt_vec_info def_stmt_info = NULL;
1373 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
09e31a48 1374 unsigned int nunits;
fb85abff 1375 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
fb85abff 1376 tree def;
fb85abff 1377 enum vect_def_type dt;
1378 bool is_simple_use;
1379 tree vector_type;
1380
6d8fb6cf 1381 if (dump_enabled_p ())
fb85abff 1382 {
7bd765d4 1383 dump_printf_loc (MSG_NOTE, vect_location,
1384 "vect_get_vec_def_for_operand: ");
1385 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
78bb46f5 1386 dump_printf (MSG_NOTE, "\n");
fb85abff 1387 }
1388
bed8b93b 1389 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1390 &def_stmt, &def, &dt);
fb85abff 1391 gcc_assert (is_simple_use);
6d8fb6cf 1392 if (dump_enabled_p ())
fb85abff 1393 {
7bd765d4 1394 int loc_printed = 0;
fb85abff 1395 if (def)
1396 {
7bd765d4 1397 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1398 loc_printed = 1;
1399 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
78bb46f5 1400 dump_printf (MSG_NOTE, "\n");
fb85abff 1401 }
1402 if (def_stmt)
1403 {
7bd765d4 1404 if (loc_printed)
1405 dump_printf (MSG_NOTE, " def_stmt = ");
1406 else
1407 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1408 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
78bb46f5 1409 dump_printf (MSG_NOTE, "\n");
fb85abff 1410 }
1411 }
1412
1413 switch (dt)
1414 {
1415 /* Case 1: operand is a constant. */
1416 case vect_constant_def:
1417 {
d716274d 1418 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1419 gcc_assert (vector_type);
09e31a48 1420 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
d716274d 1421
48e1416a 1422 if (scalar_def)
fb85abff 1423 *scalar_def = op;
1424
1425 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
6d8fb6cf 1426 if (dump_enabled_p ())
7bd765d4 1427 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1428 "Create vector_cst. nunits = %d\n", nunits);
fb85abff 1429
8ca02f71 1430 return vect_init_vector (stmt, op, vector_type, NULL);
fb85abff 1431 }
1432
1433 /* Case 2: operand is defined outside the loop - loop invariant. */
f083cd24 1434 case vect_external_def:
fb85abff 1435 {
1436 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1437 gcc_assert (vector_type);
fb85abff 1438
48e1416a 1439 if (scalar_def)
fb85abff 1440 *scalar_def = def;
1441
1442 /* Create 'vec_inv = {inv,inv,..,inv}' */
6d8fb6cf 1443 if (dump_enabled_p ())
78bb46f5 1444 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
fb85abff 1445
8ca02f71 1446 return vect_init_vector (stmt, def, vector_type, NULL);
fb85abff 1447 }
1448
1449 /* Case 3: operand is defined inside the loop. */
f083cd24 1450 case vect_internal_def:
fb85abff 1451 {
48e1416a 1452 if (scalar_def)
fb85abff 1453 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1454
1455 /* Get the def from the vectorized stmt. */
1456 def_stmt_info = vinfo_for_stmt (def_stmt);
8bf58742 1457
fb85abff 1458 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
8bf58742 1459 /* Get vectorized pattern statement. */
1460 if (!vec_stmt
1461 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1462 && !STMT_VINFO_RELEVANT (def_stmt_info))
1463 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1464 STMT_VINFO_RELATED_STMT (def_stmt_info)));
fb85abff 1465 gcc_assert (vec_stmt);
1466 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1467 vec_oprnd = PHI_RESULT (vec_stmt);
1468 else if (is_gimple_call (vec_stmt))
1469 vec_oprnd = gimple_call_lhs (vec_stmt);
1470 else
1471 vec_oprnd = gimple_assign_lhs (vec_stmt);
1472 return vec_oprnd;
1473 }
1474
1475 /* Case 4: operand is defined by a loop header phi - reduction */
1476 case vect_reduction_def:
7aa0d350 1477 case vect_double_reduction_def:
ade2ac53 1478 case vect_nested_cycle:
fb85abff 1479 {
1480 struct loop *loop;
1481
1482 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
48e1416a 1483 loop = (gimple_bb (def_stmt))->loop_father;
fb85abff 1484
1485 /* Get the def before the loop */
1486 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1487 return get_initial_def_for_reduction (stmt, op, scalar_def);
1488 }
1489
1490 /* Case 5: operand is defined by loop-header phi - induction. */
1491 case vect_induction_def:
1492 {
1493 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1494
1495 /* Get the def from the vectorized stmt. */
1496 def_stmt_info = vinfo_for_stmt (def_stmt);
1497 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
0185abae 1498 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1499 vec_oprnd = PHI_RESULT (vec_stmt);
1500 else
1501 vec_oprnd = gimple_get_lhs (vec_stmt);
fb85abff 1502 return vec_oprnd;
1503 }
1504
1505 default:
1506 gcc_unreachable ();
1507 }
1508}
1509
1510
1511/* Function vect_get_vec_def_for_stmt_copy
1512
282bf14c 1513 Return a vector-def for an operand. This function is used when the
48e1416a 1514 vectorized stmt to be created (by the caller to this function) is a "copy"
1515 created in case the vectorized result cannot fit in one vector, and several
282bf14c 1516 copies of the vector-stmt are required. In this case the vector-def is
fb85abff 1517 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
48e1416a 1518 of the stmt that defines VEC_OPRND.
fb85abff 1519 DT is the type of the vector def VEC_OPRND.
1520
1521 Context:
1522 In case the vectorization factor (VF) is bigger than the number
1523 of elements that can fit in a vectype (nunits), we have to generate
282bf14c 1524 more than one vector stmt to vectorize the scalar stmt. This situation
48e1416a 1525 arises when there are multiple data-types operated upon in the loop; the
fb85abff 1526 smallest data-type determines the VF, and as a result, when vectorizing
1527 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1528 vector stmt (each computing a vector of 'nunits' results, and together
48e1416a 1529 computing 'VF' results in each iteration). This function is called when
fb85abff 1530 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1531 which VF=16 and nunits=4, so the number of copies required is 4):
1532
1533 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
48e1416a 1534
fb85abff 1535 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1536 VS1.1: vx.1 = memref1 VS1.2
1537 VS1.2: vx.2 = memref2 VS1.3
48e1416a 1538 VS1.3: vx.3 = memref3
fb85abff 1539
1540 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1541 VSnew.1: vz1 = vx.1 + ... VSnew.2
1542 VSnew.2: vz2 = vx.2 + ... VSnew.3
1543 VSnew.3: vz3 = vx.3 + ...
1544
1545 The vectorization of S1 is explained in vectorizable_load.
1546 The vectorization of S2:
48e1416a 1547 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1548 the function 'vect_get_vec_def_for_operand' is called to
282bf14c 1549 get the relevant vector-def for each operand of S2. For operand x it
fb85abff 1550 returns the vector-def 'vx.0'.
1551
48e1416a 1552 To create the remaining copies of the vector-stmt (VSnew.j), this
1553 function is called to get the relevant vector-def for each operand. It is
1554 obtained from the respective VS1.j stmt, which is recorded in the
fb85abff 1555 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1556
48e1416a 1557 For example, to obtain the vector-def 'vx.1' in order to create the
1558 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1559 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
fb85abff 1560 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1561 and return its def ('vx.1').
1562 Overall, to create the above sequence this function will be called 3 times:
1563 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1564 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1565 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1566
1567tree
1568vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1569{
1570 gimple vec_stmt_for_operand;
1571 stmt_vec_info def_stmt_info;
1572
1573 /* Do nothing; can reuse same def. */
f083cd24 1574 if (dt == vect_external_def || dt == vect_constant_def )
fb85abff 1575 return vec_oprnd;
1576
1577 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1578 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1579 gcc_assert (def_stmt_info);
1580 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1581 gcc_assert (vec_stmt_for_operand);
1582 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1583 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1584 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1585 else
1586 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1587 return vec_oprnd;
1588}
1589
1590
1591/* Get vectorized definitions for the operands to create a copy of an original
282bf14c 1592 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
fb85abff 1593
1594static void
48e1416a 1595vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
f1f41a6c 1596 vec<tree> *vec_oprnds0,
1597 vec<tree> *vec_oprnds1)
fb85abff 1598{
f1f41a6c 1599 tree vec_oprnd = vec_oprnds0->pop ();
fb85abff 1600
1601 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
f1f41a6c 1602 vec_oprnds0->quick_push (vec_oprnd);
fb85abff 1603
f1f41a6c 1604 if (vec_oprnds1 && vec_oprnds1->length ())
fb85abff 1605 {
f1f41a6c 1606 vec_oprnd = vec_oprnds1->pop ();
fb85abff 1607 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
f1f41a6c 1608 vec_oprnds1->quick_push (vec_oprnd);
fb85abff 1609 }
1610}
1611
1612
b0f64919 1613/* Get vectorized definitions for OP0 and OP1.
1614 REDUC_INDEX is the index of reduction operand in case of reduction,
1615 and -1 otherwise. */
fb85abff 1616
b0f64919 1617void
fb85abff 1618vect_get_vec_defs (tree op0, tree op1, gimple stmt,
f1f41a6c 1619 vec<tree> *vec_oprnds0,
1620 vec<tree> *vec_oprnds1,
b0f64919 1621 slp_tree slp_node, int reduc_index)
fb85abff 1622{
1623 if (slp_node)
b0f64919 1624 {
1625 int nops = (op1 == NULL_TREE) ? 1 : 2;
c2078b80 1626 auto_vec<tree> ops (nops);
1627 auto_vec<vec<tree> > vec_defs (nops);
b0f64919 1628
f1f41a6c 1629 ops.quick_push (op0);
b0f64919 1630 if (op1)
f1f41a6c 1631 ops.quick_push (op1);
b0f64919 1632
1633 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1634
7f7695a7 1635 *vec_oprnds0 = vec_defs[0];
b0f64919 1636 if (op1)
7f7695a7 1637 *vec_oprnds1 = vec_defs[1];
b0f64919 1638 }
fb85abff 1639 else
1640 {
1641 tree vec_oprnd;
1642
f1f41a6c 1643 vec_oprnds0->create (1);
48e1416a 1644 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
f1f41a6c 1645 vec_oprnds0->quick_push (vec_oprnd);
fb85abff 1646
1647 if (op1)
1648 {
f1f41a6c 1649 vec_oprnds1->create (1);
48e1416a 1650 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
f1f41a6c 1651 vec_oprnds1->quick_push (vec_oprnd);
fb85abff 1652 }
1653 }
1654}
1655
1656
1657/* Function vect_finish_stmt_generation.
1658
1659 Insert a new stmt. */
1660
1661void
1662vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1663 gimple_stmt_iterator *gsi)
1664{
1665 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1666 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 1667 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
fb85abff 1668
1669 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1670
b324a248 1671 if (!gsi_end_p (*gsi)
1672 && gimple_has_mem_ops (vec_stmt))
1673 {
1674 gimple at_stmt = gsi_stmt (*gsi);
1675 tree vuse = gimple_vuse (at_stmt);
1676 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1677 {
1678 tree vdef = gimple_vdef (at_stmt);
1679 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1680 /* If we have an SSA vuse and insert a store, update virtual
1681 SSA form to avoid triggering the renamer. Do so only
1682 if we can easily see all uses - which is what almost always
1683 happens with the way vectorized stmts are inserted. */
1684 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1685 && ((is_gimple_assign (vec_stmt)
1686 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1687 || (is_gimple_call (vec_stmt)
1688 && !(gimple_call_flags (vec_stmt)
1689 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1690 {
1691 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1692 gimple_set_vdef (vec_stmt, new_vdef);
1693 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1694 }
1695 }
1696 }
fb85abff 1697 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1698
48e1416a 1699 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
37545e54 1700 bb_vinfo));
fb85abff 1701
6d8fb6cf 1702 if (dump_enabled_p ())
fb85abff 1703 {
7bd765d4 1704 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1705 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
78bb46f5 1706 dump_printf (MSG_NOTE, "\n");
fb85abff 1707 }
1708
eeac4e92 1709 gimple_set_location (vec_stmt, gimple_location (stmt));
4e459157 1710
1711 /* While EH edges will generally prevent vectorization, stmt might
1712 e.g. be in a must-not-throw region. Ensure newly created stmts
1713 that could throw are part of the same region. */
1714 int lp_nr = lookup_stmt_eh_lp (stmt);
1715 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1716 add_stmt_to_eh_lp (vec_stmt, lp_nr);
fb85abff 1717}
1718
1719/* Checks if CALL can be vectorized in type VECTYPE. Returns
1720 a function declaration if the target has a vectorized version
1721 of the function, or NULL_TREE if the function cannot be vectorized. */
1722
1723tree
1724vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1725{
1726 tree fndecl = gimple_call_fndecl (call);
fb85abff 1727
1728 /* We only handle functions that do not read or clobber memory -- i.e.
1729 const or novops ones. */
1730 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1731 return NULL_TREE;
1732
1733 if (!fndecl
1734 || TREE_CODE (fndecl) != FUNCTION_DECL
1735 || !DECL_BUILT_IN (fndecl))
1736 return NULL_TREE;
1737
31ac7341 1738 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
fb85abff 1739 vectype_in);
1740}
1741
c71d3c24 1742
1743static tree permute_vec_elements (tree, tree, tree, gimple,
1744 gimple_stmt_iterator *);
1745
1746
1747/* Function vectorizable_mask_load_store.
1748
1749 Check if STMT performs a conditional load or store that can be vectorized.
1750 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1751 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1752 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1753
1754static bool
1755vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1756 gimple *vec_stmt, slp_tree slp_node)
1757{
1758 tree vec_dest = NULL;
1759 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1760 stmt_vec_info prev_stmt_info;
1761 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1762 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1763 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1764 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1765 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1766 tree elem_type;
1767 gimple new_stmt;
1768 tree dummy;
1769 tree dataref_ptr = NULL_TREE;
1770 gimple ptr_incr;
1771 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1772 int ncopies;
1773 int i, j;
1774 bool inv_p;
1775 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1776 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1777 int gather_scale = 1;
1778 enum vect_def_type gather_dt = vect_unknown_def_type;
1779 bool is_store;
1780 tree mask;
1781 gimple def_stmt;
1782 tree def;
1783 enum vect_def_type dt;
1784
1785 if (slp_node != NULL)
1786 return false;
1787
1788 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1789 gcc_assert (ncopies >= 1);
1790
1791 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1792 mask = gimple_call_arg (stmt, 2);
1793 if (TYPE_PRECISION (TREE_TYPE (mask))
1794 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1795 return false;
1796
1797 /* FORNOW. This restriction should be relaxed. */
1798 if (nested_in_vect_loop && ncopies > 1)
1799 {
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1802 "multiple types in nested loop.");
1803 return false;
1804 }
1805
1806 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1807 return false;
1808
1809 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1810 return false;
1811
1812 if (!STMT_VINFO_DATA_REF (stmt_info))
1813 return false;
1814
1815 elem_type = TREE_TYPE (vectype);
1816
1817 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1818 return false;
1819
1820 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1821 return false;
1822
1823 if (STMT_VINFO_GATHER_P (stmt_info))
1824 {
1825 gimple def_stmt;
1826 tree def;
1827 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1828 &gather_off, &gather_scale);
1829 gcc_assert (gather_decl);
1830 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1831 &def_stmt, &def, &gather_dt,
1832 &gather_off_vectype))
1833 {
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "gather index use not simple.");
1837 return false;
1838 }
d7bcf3d1 1839
1840 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1841 tree masktype
1842 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1843 if (TREE_CODE (masktype) == INTEGER_TYPE)
1844 {
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "masked gather with integer mask not supported.");
1848 return false;
1849 }
c71d3c24 1850 }
1851 else if (tree_int_cst_compare (nested_in_vect_loop
1852 ? STMT_VINFO_DR_STEP (stmt_info)
1853 : DR_STEP (dr), size_zero_node) <= 0)
1854 return false;
1855 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1856 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1857 return false;
1858
1859 if (TREE_CODE (mask) != SSA_NAME)
1860 return false;
1861
1862 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1863 &def_stmt, &def, &dt))
1864 return false;
1865
1866 if (is_store)
1867 {
1868 tree rhs = gimple_call_arg (stmt, 3);
1869 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1870 &def_stmt, &def, &dt))
1871 return false;
1872 }
1873
1874 if (!vec_stmt) /* transformation not required. */
1875 {
1876 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1877 if (is_store)
1878 vect_model_store_cost (stmt_info, ncopies, false, dt,
1879 NULL, NULL, NULL);
1880 else
1881 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1882 return true;
1883 }
1884
1885 /** Transform. **/
1886
1887 if (STMT_VINFO_GATHER_P (stmt_info))
1888 {
1889 tree vec_oprnd0 = NULL_TREE, op;
1890 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1891 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
dbe41d8c 1892 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
c71d3c24 1893 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
dbe41d8c 1894 tree mask_perm_mask = NULL_TREE;
c71d3c24 1895 edge pe = loop_preheader_edge (loop);
1896 gimple_seq seq;
1897 basic_block new_bb;
1898 enum { NARROW, NONE, WIDEN } modifier;
1899 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1900
dbe41d8c 1901 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1902 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1903 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1904 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1905 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1906 scaletype = TREE_VALUE (arglist);
1907 gcc_checking_assert (types_compatible_p (srctype, rettype)
1908 && types_compatible_p (srctype, masktype));
1909
c71d3c24 1910 if (nunits == gather_off_nunits)
1911 modifier = NONE;
1912 else if (nunits == gather_off_nunits / 2)
1913 {
1914 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1915 modifier = WIDEN;
1916
1917 for (i = 0; i < gather_off_nunits; ++i)
1918 sel[i] = i | nunits;
1919
1920 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1921 gcc_assert (perm_mask != NULL_TREE);
1922 }
1923 else if (nunits == gather_off_nunits * 2)
1924 {
1925 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1926 modifier = NARROW;
1927
1928 for (i = 0; i < nunits; ++i)
1929 sel[i] = i < gather_off_nunits
1930 ? i : i + nunits - gather_off_nunits;
1931
1932 perm_mask = vect_gen_perm_mask (vectype, sel);
1933 gcc_assert (perm_mask != NULL_TREE);
1934 ncopies *= 2;
dbe41d8c 1935 for (i = 0; i < nunits; ++i)
1936 sel[i] = i | gather_off_nunits;
1937 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1938 gcc_assert (mask_perm_mask != NULL_TREE);
c71d3c24 1939 }
1940 else
1941 gcc_unreachable ();
1942
c71d3c24 1943 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1944
1945 ptr = fold_convert (ptrtype, gather_base);
1946 if (!is_gimple_min_invariant (ptr))
1947 {
1948 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1949 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1950 gcc_assert (!new_bb);
1951 }
1952
1953 scale = build_int_cst (scaletype, gather_scale);
1954
1955 prev_stmt_info = NULL;
1956 for (j = 0; j < ncopies; ++j)
1957 {
1958 if (modifier == WIDEN && (j & 1))
1959 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1960 perm_mask, stmt, gsi);
1961 else if (j == 0)
1962 op = vec_oprnd0
1963 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1964 else
1965 op = vec_oprnd0
1966 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1967
1968 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1969 {
1970 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1971 == TYPE_VECTOR_SUBPARTS (idxtype));
1972 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1973 var = make_ssa_name (var, NULL);
1974 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1975 new_stmt
1976 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1977 op, NULL_TREE);
1978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1979 op = var;
1980 }
1981
dbe41d8c 1982 if (mask_perm_mask && (j & 1))
1983 mask_op = permute_vec_elements (mask_op, mask_op,
1984 mask_perm_mask, stmt, gsi);
c71d3c24 1985 else
1986 {
dbe41d8c 1987 if (j == 0)
1988 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1989 else
1990 {
1991 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1992 &def_stmt, &def, &dt);
1993 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1994 }
c71d3c24 1995
dbe41d8c 1996 mask_op = vec_mask;
1997 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1998 {
1999 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2000 == TYPE_VECTOR_SUBPARTS (masktype));
2001 var = vect_get_new_vect_var (masktype, vect_simple_var,
2002 NULL);
2003 var = make_ssa_name (var, NULL);
2004 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2005 new_stmt
2006 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
2007 mask_op, NULL_TREE);
2008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2009 mask_op = var;
2010 }
c71d3c24 2011 }
2012
2013 new_stmt
2014 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2015 scale);
2016
2017 if (!useless_type_conversion_p (vectype, rettype))
2018 {
2019 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2020 == TYPE_VECTOR_SUBPARTS (rettype));
2021 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2022 op = make_ssa_name (var, new_stmt);
2023 gimple_call_set_lhs (new_stmt, op);
2024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2025 var = make_ssa_name (vec_dest, NULL);
2026 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2027 new_stmt
2028 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2029 NULL_TREE);
2030 }
2031 else
2032 {
2033 var = make_ssa_name (vec_dest, new_stmt);
2034 gimple_call_set_lhs (new_stmt, var);
2035 }
2036
2037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2038
2039 if (modifier == NARROW)
2040 {
2041 if ((j & 1) == 0)
2042 {
2043 prev_res = var;
2044 continue;
2045 }
2046 var = permute_vec_elements (prev_res, var,
2047 perm_mask, stmt, gsi);
2048 new_stmt = SSA_NAME_DEF_STMT (var);
2049 }
2050
2051 if (prev_stmt_info == NULL)
2052 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2053 else
2054 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2055 prev_stmt_info = vinfo_for_stmt (new_stmt);
2056 }
da7a2b38 2057
2058 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2059 from the IL. */
2060 tree lhs = gimple_call_lhs (stmt);
2061 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2062 set_vinfo_for_stmt (new_stmt, stmt_info);
2063 set_vinfo_for_stmt (stmt, NULL);
2064 STMT_VINFO_STMT (stmt_info) = new_stmt;
2065 gsi_replace (gsi, new_stmt, true);
c71d3c24 2066 return true;
2067 }
2068 else if (is_store)
2069 {
2070 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2071 prev_stmt_info = NULL;
2072 for (i = 0; i < ncopies; i++)
2073 {
2074 unsigned align, misalign;
2075
2076 if (i == 0)
2077 {
2078 tree rhs = gimple_call_arg (stmt, 3);
2079 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2080 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2081 /* We should have catched mismatched types earlier. */
2082 gcc_assert (useless_type_conversion_p (vectype,
2083 TREE_TYPE (vec_rhs)));
2084 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2085 NULL_TREE, &dummy, gsi,
2086 &ptr_incr, false, &inv_p);
2087 gcc_assert (!inv_p);
2088 }
2089 else
2090 {
2091 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2092 &def, &dt);
2093 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2094 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2095 &def, &dt);
2096 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2097 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2098 TYPE_SIZE_UNIT (vectype));
2099 }
2100
2101 align = TYPE_ALIGN_UNIT (vectype);
2102 if (aligned_access_p (dr))
2103 misalign = 0;
2104 else if (DR_MISALIGNMENT (dr) == -1)
2105 {
2106 align = TYPE_ALIGN_UNIT (elem_type);
2107 misalign = 0;
2108 }
2109 else
2110 misalign = DR_MISALIGNMENT (dr);
2111 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2112 misalign);
2113 new_stmt
2114 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2115 gimple_call_arg (stmt, 1),
2116 vec_mask, vec_rhs);
2117 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2118 if (i == 0)
2119 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2120 else
2121 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2122 prev_stmt_info = vinfo_for_stmt (new_stmt);
2123 }
2124 }
2125 else
2126 {
2127 tree vec_mask = NULL_TREE;
2128 prev_stmt_info = NULL;
2129 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2130 for (i = 0; i < ncopies; i++)
2131 {
2132 unsigned align, misalign;
2133
2134 if (i == 0)
2135 {
2136 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2137 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2138 NULL_TREE, &dummy, gsi,
2139 &ptr_incr, false, &inv_p);
2140 gcc_assert (!inv_p);
2141 }
2142 else
2143 {
2144 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2145 &def, &dt);
2146 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2147 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2148 TYPE_SIZE_UNIT (vectype));
2149 }
2150
2151 align = TYPE_ALIGN_UNIT (vectype);
2152 if (aligned_access_p (dr))
2153 misalign = 0;
2154 else if (DR_MISALIGNMENT (dr) == -1)
2155 {
2156 align = TYPE_ALIGN_UNIT (elem_type);
2157 misalign = 0;
2158 }
2159 else
2160 misalign = DR_MISALIGNMENT (dr);
2161 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2162 misalign);
2163 new_stmt
2164 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2165 gimple_call_arg (stmt, 1),
2166 vec_mask);
2167 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2168 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2169 if (i == 0)
2170 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2171 else
2172 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2173 prev_stmt_info = vinfo_for_stmt (new_stmt);
2174 }
2175 }
2176
da7a2b38 2177 if (!is_store)
2178 {
2179 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2180 from the IL. */
2181 tree lhs = gimple_call_lhs (stmt);
2182 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2183 set_vinfo_for_stmt (new_stmt, stmt_info);
2184 set_vinfo_for_stmt (stmt, NULL);
2185 STMT_VINFO_STMT (stmt_info) = new_stmt;
2186 gsi_replace (gsi, new_stmt, true);
2187 }
2188
c71d3c24 2189 return true;
2190}
2191
2192
fb85abff 2193/* Function vectorizable_call.
2194
48e1416a 2195 Check if STMT performs a function call that can be vectorized.
2196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
fb85abff 2197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2199
2200static bool
b57a47d0 2201vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2202 slp_tree slp_node)
fb85abff 2203{
2204 tree vec_dest;
2205 tree scalar_dest;
2206 tree op, type;
2207 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2209 tree vectype_out, vectype_in;
2210 int nunits_in;
2211 int nunits_out;
2212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
b57a47d0 2213 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b334cbba 2214 tree fndecl, new_temp, def, rhs_type;
fb85abff 2215 gimple def_stmt;
596648a2 2216 enum vect_def_type dt[3]
2217 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
afbe62f9 2218 gimple new_stmt = NULL;
fb85abff 2219 int ncopies, j;
1e094109 2220 vec<tree> vargs = vNULL;
fb85abff 2221 enum { NARROW, NONE, WIDEN } modifier;
2222 size_t i, nargs;
cfdcf183 2223 tree lhs;
fb85abff 2224
b57a47d0 2225 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
fb85abff 2226 return false;
2227
f083cd24 2228 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
fb85abff 2229 return false;
2230
fb85abff 2231 /* Is STMT a vectorizable call? */
2232 if (!is_gimple_call (stmt))
2233 return false;
2234
c71d3c24 2235 if (gimple_call_internal_p (stmt)
2236 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2237 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2238 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2239 slp_node);
2240
d09768a4 2241 if (gimple_call_lhs (stmt) == NULL_TREE
2242 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
fb85abff 2243 return false;
2244
d09768a4 2245 gcc_checking_assert (!stmt_can_throw_internal (stmt));
42a6710e 2246
b334cbba 2247 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2248
fb85abff 2249 /* Process function arguments. */
2250 rhs_type = NULL_TREE;
b334cbba 2251 vectype_in = NULL_TREE;
fb85abff 2252 nargs = gimple_call_num_args (stmt);
2253
7e0713b1 2254 /* Bail out if the function has more than three arguments, we do not have
2255 interesting builtin functions to vectorize with more than two arguments
2256 except for fma. No arguments is also not good. */
2257 if (nargs == 0 || nargs > 3)
fb85abff 2258 return false;
2259
3d483a94 2260 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2261 if (gimple_call_internal_p (stmt)
2262 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2263 {
2264 nargs = 0;
2265 rhs_type = unsigned_type_node;
2266 }
2267
fb85abff 2268 for (i = 0; i < nargs; i++)
2269 {
b334cbba 2270 tree opvectype;
2271
fb85abff 2272 op = gimple_call_arg (stmt, i);
2273
2274 /* We can only handle calls with arguments of the same type. */
2275 if (rhs_type
8d5cb2fa 2276 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
fb85abff 2277 {
6d8fb6cf 2278 if (dump_enabled_p ())
7bd765d4 2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 2280 "argument types differ.\n");
fb85abff 2281 return false;
2282 }
b334cbba 2283 if (!rhs_type)
2284 rhs_type = TREE_TYPE (op);
fb85abff 2285
bed8b93b 2286 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
b334cbba 2287 &def_stmt, &def, &dt[i], &opvectype))
fb85abff 2288 {
6d8fb6cf 2289 if (dump_enabled_p ())
7bd765d4 2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 2291 "use not simple.\n");
fb85abff 2292 return false;
2293 }
fb85abff 2294
b334cbba 2295 if (!vectype_in)
2296 vectype_in = opvectype;
2297 else if (opvectype
2298 && opvectype != vectype_in)
2299 {
6d8fb6cf 2300 if (dump_enabled_p ())
7bd765d4 2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 2302 "argument vector types differ.\n");
b334cbba 2303 return false;
2304 }
2305 }
2306 /* If all arguments are external or constant defs use a vector type with
2307 the same size as the output vector type. */
fb85abff 2308 if (!vectype_in)
b334cbba 2309 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
29b41aee 2310 if (vec_stmt)
2311 gcc_assert (vectype_in);
2312 if (!vectype_in)
2313 {
6d8fb6cf 2314 if (dump_enabled_p ())
29b41aee 2315 {
7bd765d4 2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "no vectype for scalar type ");
2318 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
78bb46f5 2319 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
29b41aee 2320 }
2321
2322 return false;
2323 }
fb85abff 2324
2325 /* FORNOW */
b334cbba 2326 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2327 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
fb85abff 2328 if (nunits_in == nunits_out / 2)
2329 modifier = NARROW;
2330 else if (nunits_out == nunits_in)
2331 modifier = NONE;
2332 else if (nunits_out == nunits_in / 2)
2333 modifier = WIDEN;
2334 else
2335 return false;
2336
2337 /* For now, we only vectorize functions if a target specific builtin
2338 is available. TODO -- in some cases, it might be profitable to
2339 insert the calls for pieces of the vector, in order to be able
2340 to vectorize other operations in the loop. */
2341 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2342 if (fndecl == NULL_TREE)
2343 {
3d483a94 2344 if (gimple_call_internal_p (stmt)
2345 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2346 && !slp_node
2347 && loop_vinfo
2348 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2349 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2350 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2351 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2352 {
2353 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2354 { 0, 1, 2, ... vf - 1 } vector. */
2355 gcc_assert (nargs == 0);
2356 }
2357 else
2358 {
2359 if (dump_enabled_p ())
2360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 2361 "function is not vectorizable.\n");
3d483a94 2362 return false;
2363 }
fb85abff 2364 }
2365
dd277d48 2366 gcc_assert (!gimple_vuse (stmt));
fb85abff 2367
b57a47d0 2368 if (slp_node || PURE_SLP_STMT (stmt_info))
2369 ncopies = 1;
2370 else if (modifier == NARROW)
fb85abff 2371 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2372 else
2373 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2374
2375 /* Sanity check: make sure that at least one copy of the vectorized stmt
2376 needs to be generated. */
2377 gcc_assert (ncopies >= 1);
2378
2379 if (!vec_stmt) /* transformation not required. */
2380 {
2381 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
6d8fb6cf 2382 if (dump_enabled_p ())
78bb46f5 2383 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2384 "\n");
4db2b577 2385 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
fb85abff 2386 return true;
2387 }
2388
2389 /** Transform. **/
2390
6d8fb6cf 2391 if (dump_enabled_p ())
78bb46f5 2392 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
fb85abff 2393
2394 /* Handle def. */
2395 scalar_dest = gimple_call_lhs (stmt);
2396 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2397
2398 prev_stmt_info = NULL;
2399 switch (modifier)
2400 {
2401 case NONE:
2402 for (j = 0; j < ncopies; ++j)
2403 {
2404 /* Build argument list for the vectorized call. */
2405 if (j == 0)
f1f41a6c 2406 vargs.create (nargs);
fb85abff 2407 else
f1f41a6c 2408 vargs.truncate (0);
fb85abff 2409
b57a47d0 2410 if (slp_node)
2411 {
c2078b80 2412 auto_vec<vec<tree> > vec_defs (nargs);
f1f41a6c 2413 vec<tree> vec_oprnds0;
b57a47d0 2414
2415 for (i = 0; i < nargs; i++)
f1f41a6c 2416 vargs.quick_push (gimple_call_arg (stmt, i));
b57a47d0 2417 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
7f7695a7 2418 vec_oprnds0 = vec_defs[0];
b57a47d0 2419
2420 /* Arguments are ready. Create the new vector stmt. */
f1f41a6c 2421 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
b57a47d0 2422 {
2423 size_t k;
2424 for (k = 0; k < nargs; k++)
2425 {
7f7695a7 2426 vec<tree> vec_oprndsk = vec_defs[k];
f1f41a6c 2427 vargs[k] = vec_oprndsk[i];
b57a47d0 2428 }
2429 new_stmt = gimple_build_call_vec (fndecl, vargs);
2430 new_temp = make_ssa_name (vec_dest, new_stmt);
2431 gimple_call_set_lhs (new_stmt, new_temp);
2432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
f1f41a6c 2433 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b57a47d0 2434 }
2435
2436 for (i = 0; i < nargs; i++)
2437 {
7f7695a7 2438 vec<tree> vec_oprndsi = vec_defs[i];
f1f41a6c 2439 vec_oprndsi.release ();
b57a47d0 2440 }
b57a47d0 2441 continue;
2442 }
2443
fb85abff 2444 for (i = 0; i < nargs; i++)
2445 {
2446 op = gimple_call_arg (stmt, i);
2447 if (j == 0)
2448 vec_oprnd0
2449 = vect_get_vec_def_for_operand (op, stmt, NULL);
2450 else
afbe62f9 2451 {
2452 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2453 vec_oprnd0
2454 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2455 }
fb85abff 2456
f1f41a6c 2457 vargs.quick_push (vec_oprnd0);
fb85abff 2458 }
2459
3d483a94 2460 if (gimple_call_internal_p (stmt)
2461 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2462 {
2463 tree *v = XALLOCAVEC (tree, nunits_out);
2464 int k;
2465 for (k = 0; k < nunits_out; ++k)
2466 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2467 tree cst = build_vector (vectype_out, v);
2468 tree new_var
2469 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2470 gimple init_stmt = gimple_build_assign (new_var, cst);
2471 new_temp = make_ssa_name (new_var, init_stmt);
2472 gimple_assign_set_lhs (init_stmt, new_temp);
2473 vect_init_vector_1 (stmt, init_stmt, NULL);
2474 new_temp = make_ssa_name (vec_dest, NULL);
2475 new_stmt = gimple_build_assign (new_temp,
2476 gimple_assign_lhs (init_stmt));
2477 }
2478 else
2479 {
2480 new_stmt = gimple_build_call_vec (fndecl, vargs);
2481 new_temp = make_ssa_name (vec_dest, new_stmt);
2482 gimple_call_set_lhs (new_stmt, new_temp);
2483 }
fb85abff 2484 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2485
2486 if (j == 0)
2487 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2488 else
2489 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2490
2491 prev_stmt_info = vinfo_for_stmt (new_stmt);
2492 }
2493
2494 break;
2495
2496 case NARROW:
2497 for (j = 0; j < ncopies; ++j)
2498 {
2499 /* Build argument list for the vectorized call. */
2500 if (j == 0)
f1f41a6c 2501 vargs.create (nargs * 2);
fb85abff 2502 else
f1f41a6c 2503 vargs.truncate (0);
fb85abff 2504
b57a47d0 2505 if (slp_node)
2506 {
c2078b80 2507 auto_vec<vec<tree> > vec_defs (nargs);
f1f41a6c 2508 vec<tree> vec_oprnds0;
b57a47d0 2509
2510 for (i = 0; i < nargs; i++)
f1f41a6c 2511 vargs.quick_push (gimple_call_arg (stmt, i));
b57a47d0 2512 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
7f7695a7 2513 vec_oprnds0 = vec_defs[0];
b57a47d0 2514
2515 /* Arguments are ready. Create the new vector stmt. */
f1f41a6c 2516 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
b57a47d0 2517 {
2518 size_t k;
f1f41a6c 2519 vargs.truncate (0);
b57a47d0 2520 for (k = 0; k < nargs; k++)
2521 {
7f7695a7 2522 vec<tree> vec_oprndsk = vec_defs[k];
f1f41a6c 2523 vargs.quick_push (vec_oprndsk[i]);
2524 vargs.quick_push (vec_oprndsk[i + 1]);
b57a47d0 2525 }
2526 new_stmt = gimple_build_call_vec (fndecl, vargs);
2527 new_temp = make_ssa_name (vec_dest, new_stmt);
2528 gimple_call_set_lhs (new_stmt, new_temp);
2529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
f1f41a6c 2530 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
b57a47d0 2531 }
2532
2533 for (i = 0; i < nargs; i++)
2534 {
7f7695a7 2535 vec<tree> vec_oprndsi = vec_defs[i];
f1f41a6c 2536 vec_oprndsi.release ();
b57a47d0 2537 }
b57a47d0 2538 continue;
2539 }
2540
fb85abff 2541 for (i = 0; i < nargs; i++)
2542 {
2543 op = gimple_call_arg (stmt, i);
2544 if (j == 0)
2545 {
2546 vec_oprnd0
2547 = vect_get_vec_def_for_operand (op, stmt, NULL);
2548 vec_oprnd1
afbe62f9 2549 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
fb85abff 2550 }
2551 else
2552 {
73a82b22 2553 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
fb85abff 2554 vec_oprnd0
afbe62f9 2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
fb85abff 2556 vec_oprnd1
afbe62f9 2557 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
fb85abff 2558 }
2559
f1f41a6c 2560 vargs.quick_push (vec_oprnd0);
2561 vargs.quick_push (vec_oprnd1);
fb85abff 2562 }
2563
2564 new_stmt = gimple_build_call_vec (fndecl, vargs);
2565 new_temp = make_ssa_name (vec_dest, new_stmt);
2566 gimple_call_set_lhs (new_stmt, new_temp);
fb85abff 2567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2568
2569 if (j == 0)
2570 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2571 else
2572 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2573
2574 prev_stmt_info = vinfo_for_stmt (new_stmt);
2575 }
2576
2577 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2578
2579 break;
2580
2581 case WIDEN:
2582 /* No current target implements this case. */
2583 return false;
2584 }
2585
f1f41a6c 2586 vargs.release ();
fb85abff 2587
fb85abff 2588 /* The call in STMT might prevent it from being removed in dce.
2589 We however cannot remove it here, due to the way the ssa name
2590 it defines is mapped to the new definition. So just replace
2591 rhs of the statement with something harmless. */
2592
529fcc2a 2593 if (slp_node)
2594 return true;
2595
fb85abff 2596 type = TREE_TYPE (scalar_dest);
cfdcf183 2597 if (is_pattern_stmt_p (stmt_info))
2598 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2599 else
2600 lhs = gimple_call_lhs (stmt);
2601 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
fb85abff 2602 set_vinfo_for_stmt (new_stmt, stmt_info);
529fcc2a 2603 set_vinfo_for_stmt (stmt, NULL);
fb85abff 2604 STMT_VINFO_STMT (stmt_info) = new_stmt;
2605 gsi_replace (gsi, new_stmt, false);
fb85abff 2606
2607 return true;
2608}
2609
2610
d09768a4 2611struct simd_call_arg_info
2612{
2613 tree vectype;
2614 tree op;
2615 enum vect_def_type dt;
2616 HOST_WIDE_INT linear_step;
2617 unsigned int align;
2618};
2619
2620/* Function vectorizable_simd_clone_call.
2621
2622 Check if STMT performs a function call that can be vectorized
2623 by calling a simd clone of the function.
2624 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2625 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2626 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2627
2628static bool
2629vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2630 gimple *vec_stmt, slp_tree slp_node)
2631{
2632 tree vec_dest;
2633 tree scalar_dest;
2634 tree op, type;
2635 tree vec_oprnd0 = NULL_TREE;
2636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2637 tree vectype;
2638 unsigned int nunits;
2639 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2640 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2641 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2642 tree fndecl, new_temp, def;
2643 gimple def_stmt;
2644 gimple new_stmt = NULL;
2645 int ncopies, j;
2646 vec<simd_call_arg_info> arginfo = vNULL;
2647 vec<tree> vargs = vNULL;
2648 size_t i, nargs;
2649 tree lhs, rtype, ratype;
2650 vec<constructor_elt, va_gc> *ret_ctor_elts;
2651
2652 /* Is STMT a vectorizable call? */
2653 if (!is_gimple_call (stmt))
2654 return false;
2655
2656 fndecl = gimple_call_fndecl (stmt);
2657 if (fndecl == NULL_TREE)
2658 return false;
2659
415d1b9a 2660 struct cgraph_node *node = cgraph_node::get (fndecl);
d09768a4 2661 if (node == NULL || node->simd_clones == NULL)
2662 return false;
2663
2664 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2665 return false;
2666
2667 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2668 return false;
2669
2670 if (gimple_call_lhs (stmt)
2671 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2672 return false;
2673
2674 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2675
2676 vectype = STMT_VINFO_VECTYPE (stmt_info);
2677
2678 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2679 return false;
2680
2681 /* FORNOW */
2682 if (slp_node || PURE_SLP_STMT (stmt_info))
2683 return false;
2684
2685 /* Process function arguments. */
2686 nargs = gimple_call_num_args (stmt);
2687
2688 /* Bail out if the function has zero arguments. */
2689 if (nargs == 0)
2690 return false;
2691
2692 arginfo.create (nargs);
2693
2694 for (i = 0; i < nargs; i++)
2695 {
2696 simd_call_arg_info thisarginfo;
2697 affine_iv iv;
2698
2699 thisarginfo.linear_step = 0;
2700 thisarginfo.align = 0;
2701 thisarginfo.op = NULL_TREE;
2702
2703 op = gimple_call_arg (stmt, i);
2704 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2705 &def_stmt, &def, &thisarginfo.dt,
2706 &thisarginfo.vectype)
2707 || thisarginfo.dt == vect_uninitialized_def)
2708 {
2709 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2711 "use not simple.\n");
2712 arginfo.release ();
2713 return false;
2714 }
2715
2716 if (thisarginfo.dt == vect_constant_def
2717 || thisarginfo.dt == vect_external_def)
2718 gcc_assert (thisarginfo.vectype == NULL_TREE);
2719 else
2720 gcc_assert (thisarginfo.vectype != NULL_TREE);
2721
2722 if (thisarginfo.dt != vect_constant_def
2723 && thisarginfo.dt != vect_external_def
2724 && loop_vinfo
2725 && TREE_CODE (op) == SSA_NAME
2726 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2727 && tree_fits_shwi_p (iv.step))
2728 {
2729 thisarginfo.linear_step = tree_to_shwi (iv.step);
2730 thisarginfo.op = iv.base;
2731 }
2732 else if ((thisarginfo.dt == vect_constant_def
2733 || thisarginfo.dt == vect_external_def)
2734 && POINTER_TYPE_P (TREE_TYPE (op)))
2735 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2736
2737 arginfo.quick_push (thisarginfo);
2738 }
2739
2740 unsigned int badness = 0;
2741 struct cgraph_node *bestn = NULL;
2742 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
415d1b9a 2743 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
d09768a4 2744 else
2745 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2746 n = n->simdclone->next_clone)
2747 {
2748 unsigned int this_badness = 0;
2749 if (n->simdclone->simdlen
2750 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2751 || n->simdclone->nargs != nargs)
2752 continue;
2753 if (n->simdclone->simdlen
2754 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2755 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2756 - exact_log2 (n->simdclone->simdlen)) * 1024;
2757 if (n->simdclone->inbranch)
2758 this_badness += 2048;
2759 int target_badness = targetm.simd_clone.usable (n);
2760 if (target_badness < 0)
2761 continue;
2762 this_badness += target_badness * 512;
2763 /* FORNOW: Have to add code to add the mask argument. */
2764 if (n->simdclone->inbranch)
2765 continue;
2766 for (i = 0; i < nargs; i++)
2767 {
2768 switch (n->simdclone->args[i].arg_type)
2769 {
2770 case SIMD_CLONE_ARG_TYPE_VECTOR:
2771 if (!useless_type_conversion_p
2772 (n->simdclone->args[i].orig_type,
2773 TREE_TYPE (gimple_call_arg (stmt, i))))
2774 i = -1;
2775 else if (arginfo[i].dt == vect_constant_def
2776 || arginfo[i].dt == vect_external_def
2777 || arginfo[i].linear_step)
2778 this_badness += 64;
2779 break;
2780 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2781 if (arginfo[i].dt != vect_constant_def
2782 && arginfo[i].dt != vect_external_def)
2783 i = -1;
2784 break;
2785 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2786 if (arginfo[i].dt == vect_constant_def
2787 || arginfo[i].dt == vect_external_def
2788 || (arginfo[i].linear_step
2789 != n->simdclone->args[i].linear_step))
2790 i = -1;
2791 break;
2792 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2793 /* FORNOW */
2794 i = -1;
2795 break;
2796 case SIMD_CLONE_ARG_TYPE_MASK:
2797 gcc_unreachable ();
2798 }
2799 if (i == (size_t) -1)
2800 break;
2801 if (n->simdclone->args[i].alignment > arginfo[i].align)
2802 {
2803 i = -1;
2804 break;
2805 }
2806 if (arginfo[i].align)
2807 this_badness += (exact_log2 (arginfo[i].align)
2808 - exact_log2 (n->simdclone->args[i].alignment));
2809 }
2810 if (i == (size_t) -1)
2811 continue;
2812 if (bestn == NULL || this_badness < badness)
2813 {
2814 bestn = n;
2815 badness = this_badness;
2816 }
2817 }
2818
2819 if (bestn == NULL)
2820 {
2821 arginfo.release ();
2822 return false;
2823 }
2824
2825 for (i = 0; i < nargs; i++)
2826 if ((arginfo[i].dt == vect_constant_def
2827 || arginfo[i].dt == vect_external_def)
2828 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2829 {
2830 arginfo[i].vectype
2831 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2832 i)));
2833 if (arginfo[i].vectype == NULL
2834 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2835 > bestn->simdclone->simdlen))
2836 {
2837 arginfo.release ();
2838 return false;
2839 }
2840 }
2841
2842 fndecl = bestn->decl;
2843 nunits = bestn->simdclone->simdlen;
2844 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2845
2846 /* If the function isn't const, only allow it in simd loops where user
2847 has asserted that at least nunits consecutive iterations can be
2848 performed using SIMD instructions. */
2849 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2850 && gimple_vuse (stmt))
2851 {
2852 arginfo.release ();
2853 return false;
2854 }
2855
2856 /* Sanity check: make sure that at least one copy of the vectorized stmt
2857 needs to be generated. */
2858 gcc_assert (ncopies >= 1);
2859
2860 if (!vec_stmt) /* transformation not required. */
2861 {
2862 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2863 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2864 if (dump_enabled_p ())
2865 dump_printf_loc (MSG_NOTE, vect_location,
2866 "=== vectorizable_simd_clone_call ===\n");
2867/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2868 arginfo.release ();
2869 return true;
2870 }
2871
2872 /** Transform. **/
2873
2874 if (dump_enabled_p ())
2875 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2876
2877 /* Handle def. */
2878 scalar_dest = gimple_call_lhs (stmt);
2879 vec_dest = NULL_TREE;
2880 rtype = NULL_TREE;
2881 ratype = NULL_TREE;
2882 if (scalar_dest)
2883 {
2884 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2885 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2886 if (TREE_CODE (rtype) == ARRAY_TYPE)
2887 {
2888 ratype = rtype;
2889 rtype = TREE_TYPE (ratype);
2890 }
2891 }
2892
2893 prev_stmt_info = NULL;
2894 for (j = 0; j < ncopies; ++j)
2895 {
2896 /* Build argument list for the vectorized call. */
2897 if (j == 0)
2898 vargs.create (nargs);
2899 else
2900 vargs.truncate (0);
2901
2902 for (i = 0; i < nargs; i++)
2903 {
2904 unsigned int k, l, m, o;
2905 tree atype;
2906 op = gimple_call_arg (stmt, i);
2907 switch (bestn->simdclone->args[i].arg_type)
2908 {
2909 case SIMD_CLONE_ARG_TYPE_VECTOR:
2910 atype = bestn->simdclone->args[i].vector_type;
2911 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2912 for (m = j * o; m < (j + 1) * o; m++)
2913 {
2914 if (TYPE_VECTOR_SUBPARTS (atype)
2915 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2916 {
2917 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2918 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2919 / TYPE_VECTOR_SUBPARTS (atype));
2920 gcc_assert ((k & (k - 1)) == 0);
2921 if (m == 0)
2922 vec_oprnd0
2923 = vect_get_vec_def_for_operand (op, stmt, NULL);
2924 else
2925 {
2926 vec_oprnd0 = arginfo[i].op;
2927 if ((m & (k - 1)) == 0)
2928 vec_oprnd0
2929 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2930 vec_oprnd0);
2931 }
2932 arginfo[i].op = vec_oprnd0;
2933 vec_oprnd0
2934 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2935 size_int (prec),
2936 bitsize_int ((m & (k - 1)) * prec));
2937 new_stmt
2938 = gimple_build_assign (make_ssa_name (atype, NULL),
2939 vec_oprnd0);
2940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2941 vargs.safe_push (gimple_assign_lhs (new_stmt));
2942 }
2943 else
2944 {
2945 k = (TYPE_VECTOR_SUBPARTS (atype)
2946 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2947 gcc_assert ((k & (k - 1)) == 0);
2948 vec<constructor_elt, va_gc> *ctor_elts;
2949 if (k != 1)
2950 vec_alloc (ctor_elts, k);
2951 else
2952 ctor_elts = NULL;
2953 for (l = 0; l < k; l++)
2954 {
2955 if (m == 0 && l == 0)
2956 vec_oprnd0
2957 = vect_get_vec_def_for_operand (op, stmt, NULL);
2958 else
2959 vec_oprnd0
2960 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2961 arginfo[i].op);
2962 arginfo[i].op = vec_oprnd0;
2963 if (k == 1)
2964 break;
2965 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2966 vec_oprnd0);
2967 }
2968 if (k == 1)
2969 vargs.safe_push (vec_oprnd0);
2970 else
2971 {
2972 vec_oprnd0 = build_constructor (atype, ctor_elts);
2973 new_stmt
2974 = gimple_build_assign (make_ssa_name (atype, NULL),
2975 vec_oprnd0);
2976 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2977 vargs.safe_push (gimple_assign_lhs (new_stmt));
2978 }
2979 }
2980 }
2981 break;
2982 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2983 vargs.safe_push (op);
2984 break;
2985 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2986 if (j == 0)
2987 {
2988 gimple_seq stmts;
2989 arginfo[i].op
2990 = force_gimple_operand (arginfo[i].op, &stmts, true,
2991 NULL_TREE);
2992 if (stmts != NULL)
2993 {
2994 basic_block new_bb;
2995 edge pe = loop_preheader_edge (loop);
2996 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2997 gcc_assert (!new_bb);
2998 }
2999 tree phi_res = copy_ssa_name (op, NULL);
3000 gimple new_phi = create_phi_node (phi_res, loop->header);
3001 set_vinfo_for_stmt (new_phi,
3002 new_stmt_vec_info (new_phi, loop_vinfo,
3003 NULL));
3004 add_phi_arg (new_phi, arginfo[i].op,
3005 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3006 enum tree_code code
3007 = POINTER_TYPE_P (TREE_TYPE (op))
3008 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3009 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3010 ? sizetype : TREE_TYPE (op);
bdff91a1 3011 widest_int cst
3012 = wi::mul (bestn->simdclone->args[i].linear_step,
3013 ncopies * nunits);
3014 tree tcst = wide_int_to_tree (type, cst);
d09768a4 3015 tree phi_arg = copy_ssa_name (op, NULL);
3016 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
3017 phi_res, tcst);
3018 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3019 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3020 set_vinfo_for_stmt (new_stmt,
3021 new_stmt_vec_info (new_stmt, loop_vinfo,
3022 NULL));
3023 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3024 UNKNOWN_LOCATION);
3025 arginfo[i].op = phi_res;
3026 vargs.safe_push (phi_res);
3027 }
3028 else
3029 {
3030 enum tree_code code
3031 = POINTER_TYPE_P (TREE_TYPE (op))
3032 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3033 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3034 ? sizetype : TREE_TYPE (op);
bdff91a1 3035 widest_int cst
3036 = wi::mul (bestn->simdclone->args[i].linear_step,
3037 j * nunits);
3038 tree tcst = wide_int_to_tree (type, cst);
d09768a4 3039 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3040 new_stmt
3041 = gimple_build_assign_with_ops (code, new_temp,
3042 arginfo[i].op, tcst);
3043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3044 vargs.safe_push (new_temp);
3045 }
3046 break;
3047 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3048 default:
3049 gcc_unreachable ();
3050 }
3051 }
3052
3053 new_stmt = gimple_build_call_vec (fndecl, vargs);
3054 if (vec_dest)
3055 {
3056 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3057 if (ratype)
3058 new_temp = create_tmp_var (ratype, NULL);
3059 else if (TYPE_VECTOR_SUBPARTS (vectype)
3060 == TYPE_VECTOR_SUBPARTS (rtype))
3061 new_temp = make_ssa_name (vec_dest, new_stmt);
3062 else
3063 new_temp = make_ssa_name (rtype, new_stmt);
3064 gimple_call_set_lhs (new_stmt, new_temp);
3065 }
3066 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3067
3068 if (vec_dest)
3069 {
3070 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3071 {
3072 unsigned int k, l;
3073 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3074 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3075 gcc_assert ((k & (k - 1)) == 0);
3076 for (l = 0; l < k; l++)
3077 {
3078 tree t;
3079 if (ratype)
3080 {
3081 t = build_fold_addr_expr (new_temp);
3082 t = build2 (MEM_REF, vectype, t,
3083 build_int_cst (TREE_TYPE (t),
3084 l * prec / BITS_PER_UNIT));
3085 }
3086 else
3087 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3088 size_int (prec), bitsize_int (l * prec));
3089 new_stmt
3090 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3092 if (j == 0 && l == 0)
3093 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3094 else
3095 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3096
3097 prev_stmt_info = vinfo_for_stmt (new_stmt);
3098 }
3099
3100 if (ratype)
3101 {
3102 tree clobber = build_constructor (ratype, NULL);
3103 TREE_THIS_VOLATILE (clobber) = 1;
3104 new_stmt = gimple_build_assign (new_temp, clobber);
3105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3106 }
3107 continue;
3108 }
3109 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3110 {
3111 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3112 / TYPE_VECTOR_SUBPARTS (rtype));
3113 gcc_assert ((k & (k - 1)) == 0);
3114 if ((j & (k - 1)) == 0)
3115 vec_alloc (ret_ctor_elts, k);
3116 if (ratype)
3117 {
3118 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3119 for (m = 0; m < o; m++)
3120 {
3121 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3122 size_int (m), NULL_TREE, NULL_TREE);
3123 new_stmt
3124 = gimple_build_assign (make_ssa_name (rtype, NULL),
3125 tem);
3126 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3127 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3128 gimple_assign_lhs (new_stmt));
3129 }
3130 tree clobber = build_constructor (ratype, NULL);
3131 TREE_THIS_VOLATILE (clobber) = 1;
3132 new_stmt = gimple_build_assign (new_temp, clobber);
3133 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3134 }
3135 else
3136 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3137 if ((j & (k - 1)) != k - 1)
3138 continue;
3139 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3140 new_stmt
3141 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3142 vec_oprnd0);
3143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3144
3145 if ((unsigned) j == k - 1)
3146 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3147 else
3148 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3149
3150 prev_stmt_info = vinfo_for_stmt (new_stmt);
3151 continue;
3152 }
3153 else if (ratype)
3154 {
3155 tree t = build_fold_addr_expr (new_temp);
3156 t = build2 (MEM_REF, vectype, t,
3157 build_int_cst (TREE_TYPE (t), 0));
3158 new_stmt
3159 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3160 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3161 tree clobber = build_constructor (ratype, NULL);
3162 TREE_THIS_VOLATILE (clobber) = 1;
3163 vect_finish_stmt_generation (stmt,
3164 gimple_build_assign (new_temp,
3165 clobber), gsi);
3166 }
3167 }
3168
3169 if (j == 0)
3170 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3171 else
3172 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3173
3174 prev_stmt_info = vinfo_for_stmt (new_stmt);
3175 }
3176
3177 vargs.release ();
3178
3179 /* The call in STMT might prevent it from being removed in dce.
3180 We however cannot remove it here, due to the way the ssa name
3181 it defines is mapped to the new definition. So just replace
3182 rhs of the statement with something harmless. */
3183
3184 if (slp_node)
3185 return true;
3186
3187 if (scalar_dest)
3188 {
3189 type = TREE_TYPE (scalar_dest);
3190 if (is_pattern_stmt_p (stmt_info))
3191 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3192 else
3193 lhs = gimple_call_lhs (stmt);
3194 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3195 }
3196 else
3197 new_stmt = gimple_build_nop ();
3198 set_vinfo_for_stmt (new_stmt, stmt_info);
3199 set_vinfo_for_stmt (stmt, NULL);
3200 STMT_VINFO_STMT (stmt_info) = new_stmt;
3201 gsi_replace (gsi, new_stmt, false);
3202 unlink_stmt_vdef (stmt);
3203
3204 return true;
3205}
3206
3207
fb85abff 3208/* Function vect_gen_widened_results_half
3209
3210 Create a vector stmt whose code, type, number of arguments, and result
48e1416a 3211 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
282bf14c 3212 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
fb85abff 3213 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3214 needs to be created (DECL is a function-decl of a target-builtin).
3215 STMT is the original scalar stmt that we are vectorizing. */
3216
3217static gimple
3218vect_gen_widened_results_half (enum tree_code code,
3219 tree decl,
3220 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3221 tree vec_dest, gimple_stmt_iterator *gsi,
3222 gimple stmt)
48e1416a 3223{
fb85abff 3224 gimple new_stmt;
48e1416a 3225 tree new_temp;
3226
3227 /* Generate half of the widened result: */
3228 if (code == CALL_EXPR)
3229 {
3230 /* Target specific support */
fb85abff 3231 if (op_type == binary_op)
3232 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3233 else
3234 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3235 new_temp = make_ssa_name (vec_dest, new_stmt);
3236 gimple_call_set_lhs (new_stmt, new_temp);
48e1416a 3237 }
3238 else
fb85abff 3239 {
48e1416a 3240 /* Generic support */
3241 gcc_assert (op_type == TREE_CODE_LENGTH (code));
fb85abff 3242 if (op_type != binary_op)
3243 vec_oprnd1 = NULL;
3244 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3245 vec_oprnd1);
3246 new_temp = make_ssa_name (vec_dest, new_stmt);
3247 gimple_assign_set_lhs (new_stmt, new_temp);
48e1416a 3248 }
fb85abff 3249 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3250
fb85abff 3251 return new_stmt;
3252}
3253
7d96a007 3254
3255/* Get vectorized definitions for loop-based vectorization. For the first
3256 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3257 scalar operand), and for the rest we get a copy with
3258 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3259 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3260 The vectors are collected into VEC_OPRNDS. */
3261
3262static void
3263vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
f1f41a6c 3264 vec<tree> *vec_oprnds, int multi_step_cvt)
7d96a007 3265{
3266 tree vec_oprnd;
3267
3268 /* Get first vector operand. */
3269 /* All the vector operands except the very first one (that is scalar oprnd)
3270 are stmt copies. */
3271 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3272 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3273 else
3274 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3275
f1f41a6c 3276 vec_oprnds->quick_push (vec_oprnd);
7d96a007 3277
3278 /* Get second vector operand. */
3279 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
f1f41a6c 3280 vec_oprnds->quick_push (vec_oprnd);
7d96a007 3281
3282 *oprnd = vec_oprnd;
3283
3284 /* For conversion in multiple steps, continue to get operands
3285 recursively. */
3286 if (multi_step_cvt)
3287 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3288}
3289
3290
3291/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3292 For multi-step conversions store the resulting vectors and call the function
3293 recursively. */
3294
3295static void
f1f41a6c 3296vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
7d96a007 3297 int multi_step_cvt, gimple stmt,
f1f41a6c 3298 vec<tree> vec_dsts,
7d96a007 3299 gimple_stmt_iterator *gsi,
3300 slp_tree slp_node, enum tree_code code,
3301 stmt_vec_info *prev_stmt_info)
3302{
3303 unsigned int i;
3304 tree vop0, vop1, new_tmp, vec_dest;
3305 gimple new_stmt;
3306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3307
f1f41a6c 3308 vec_dest = vec_dsts.pop ();
7d96a007 3309
f1f41a6c 3310 for (i = 0; i < vec_oprnds->length (); i += 2)
7d96a007 3311 {
3312 /* Create demotion operation. */
f1f41a6c 3313 vop0 = (*vec_oprnds)[i];
3314 vop1 = (*vec_oprnds)[i + 1];
7d96a007 3315 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3316 new_tmp = make_ssa_name (vec_dest, new_stmt);
3317 gimple_assign_set_lhs (new_stmt, new_tmp);
3318 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3319
3320 if (multi_step_cvt)
3321 /* Store the resulting vector for next recursive call. */
f1f41a6c 3322 (*vec_oprnds)[i/2] = new_tmp;
7d96a007 3323 else
3324 {
3325 /* This is the last step of the conversion sequence. Store the
3326 vectors in SLP_NODE or in vector info of the scalar statement
3327 (or in STMT_VINFO_RELATED_STMT chain). */
3328 if (slp_node)
f1f41a6c 3329 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7d96a007 3330 else
3331 {
3332 if (!*prev_stmt_info)
3333 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3334 else
3335 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3336
3337 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3338 }
3339 }
3340 }
3341
3342 /* For multi-step demotion operations we first generate demotion operations
3343 from the source type to the intermediate types, and then combine the
3344 results (stored in VEC_OPRNDS) in demotion operation to the destination
3345 type. */
3346 if (multi_step_cvt)
3347 {
3348 /* At each level of recursion we have half of the operands we had at the
3349 previous level. */
f1f41a6c 3350 vec_oprnds->truncate ((i+1)/2);
7d96a007 3351 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3352 stmt, vec_dsts, gsi, slp_node,
3353 VEC_PACK_TRUNC_EXPR,
3354 prev_stmt_info);
3355 }
3356
f1f41a6c 3357 vec_dsts.quick_push (vec_dest);
7d96a007 3358}
3359
3360
3361/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3362 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3363 the resulting vectors and call the function recursively. */
3364
3365static void
f1f41a6c 3366vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3367 vec<tree> *vec_oprnds1,
7d96a007 3368 gimple stmt, tree vec_dest,
3369 gimple_stmt_iterator *gsi,
3370 enum tree_code code1,
3371 enum tree_code code2, tree decl1,
3372 tree decl2, int op_type)
3373{
3374 int i;
3375 tree vop0, vop1, new_tmp1, new_tmp2;
3376 gimple new_stmt1, new_stmt2;
1e094109 3377 vec<tree> vec_tmp = vNULL;
7d96a007 3378
f1f41a6c 3379 vec_tmp.create (vec_oprnds0->length () * 2);
3380 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
7d96a007 3381 {
3382 if (op_type == binary_op)
f1f41a6c 3383 vop1 = (*vec_oprnds1)[i];
7d96a007 3384 else
3385 vop1 = NULL_TREE;
3386
3387 /* Generate the two halves of promotion operation. */
3388 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3389 op_type, vec_dest, gsi, stmt);
3390 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3391 op_type, vec_dest, gsi, stmt);
3392 if (is_gimple_call (new_stmt1))
3393 {
3394 new_tmp1 = gimple_call_lhs (new_stmt1);
3395 new_tmp2 = gimple_call_lhs (new_stmt2);
3396 }
3397 else
3398 {
3399 new_tmp1 = gimple_assign_lhs (new_stmt1);
3400 new_tmp2 = gimple_assign_lhs (new_stmt2);
3401 }
3402
3403 /* Store the results for the next step. */
f1f41a6c 3404 vec_tmp.quick_push (new_tmp1);
3405 vec_tmp.quick_push (new_tmp2);
7d96a007 3406 }
3407
57fad2bf 3408 vec_oprnds0->release ();
7d96a007 3409 *vec_oprnds0 = vec_tmp;
3410}
3411
3412
48e1416a 3413/* Check if STMT performs a conversion operation, that can be vectorized.
3414 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7d96a007 3415 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
fb85abff 3416 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3417
3418static bool
3419vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3420 gimple *vec_stmt, slp_tree slp_node)
3421{
3422 tree vec_dest;
3423 tree scalar_dest;
7d96a007 3424 tree op0, op1 = NULL_TREE;
fb85abff 3425 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3426 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3427 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3428 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
7d96a007 3429 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
fb85abff 3430 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3431 tree new_temp;
3432 tree def;
3433 gimple def_stmt;
3434 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3435 gimple new_stmt = NULL;
3436 stmt_vec_info prev_stmt_info;
3437 int nunits_in;
3438 int nunits_out;
3439 tree vectype_out, vectype_in;
7d96a007 3440 int ncopies, i, j;
3441 tree lhs_type, rhs_type;
fb85abff 3442 enum { NARROW, NONE, WIDEN } modifier;
1e094109 3443 vec<tree> vec_oprnds0 = vNULL;
3444 vec<tree> vec_oprnds1 = vNULL;
fb85abff 3445 tree vop0;
7d96a007 3446 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3447 int multi_step_cvt = 0;
1e094109 3448 vec<tree> vec_dsts = vNULL;
3449 vec<tree> interm_types = vNULL;
7d96a007 3450 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3451 int op_type;
3754d046 3452 machine_mode rhs_mode;
7d96a007 3453 unsigned short fltsz;
fb85abff 3454
3455 /* Is STMT a vectorizable conversion? */
3456
7d96a007 3457 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
fb85abff 3458 return false;
3459
f083cd24 3460 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
fb85abff 3461 return false;
3462
3463 if (!is_gimple_assign (stmt))
3464 return false;
3465
3466 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3467 return false;
3468
3469 code = gimple_assign_rhs_code (stmt);
7d96a007 3470 if (!CONVERT_EXPR_CODE_P (code)
3471 && code != FIX_TRUNC_EXPR
3472 && code != FLOAT_EXPR
3473 && code != WIDEN_MULT_EXPR
3474 && code != WIDEN_LSHIFT_EXPR)
fb85abff 3475 return false;
3476
7d96a007 3477 op_type = TREE_CODE_LENGTH (code);
3478
fb85abff 3479 /* Check types of lhs and rhs. */
b334cbba 3480 scalar_dest = gimple_assign_lhs (stmt);
7d96a007 3481 lhs_type = TREE_TYPE (scalar_dest);
b334cbba 3482 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3483
fb85abff 3484 op0 = gimple_assign_rhs1 (stmt);
3485 rhs_type = TREE_TYPE (op0);
7d96a007 3486
3487 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3488 && !((INTEGRAL_TYPE_P (lhs_type)
3489 && INTEGRAL_TYPE_P (rhs_type))
3490 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3491 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3492 return false;
3493
3494 if ((INTEGRAL_TYPE_P (lhs_type)
3495 && (TYPE_PRECISION (lhs_type)
3496 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3497 || (INTEGRAL_TYPE_P (rhs_type)
3498 && (TYPE_PRECISION (rhs_type)
3499 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3500 {
6d8fb6cf 3501 if (dump_enabled_p ())
7bd765d4 3502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 3503 "type conversion to/from bit-precision unsupported."
3504 "\n");
7d96a007 3505 return false;
3506 }
3507
b334cbba 3508 /* Check the operands of the operation. */
bed8b93b 3509 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b334cbba 3510 &def_stmt, &def, &dt[0], &vectype_in))
3511 {
6d8fb6cf 3512 if (dump_enabled_p ())
7bd765d4 3513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 3514 "use not simple.\n");
b334cbba 3515 return false;
3516 }
7d96a007 3517 if (op_type == binary_op)
3518 {
3519 bool ok;
3520
3521 op1 = gimple_assign_rhs2 (stmt);
3522 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3523 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3524 OP1. */
3525 if (CONSTANT_CLASS_P (op0))
4c0c783a 3526 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
7d96a007 3527 &def_stmt, &def, &dt[1], &vectype_in);
3528 else
4c0c783a 3529 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
bed8b93b 3530 &def, &dt[1]);
7d96a007 3531
3532 if (!ok)
3533 {
6d8fb6cf 3534 if (dump_enabled_p ())
7bd765d4 3535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 3536 "use not simple.\n");
7d96a007 3537 return false;
3538 }
3539 }
3540
b334cbba 3541 /* If op0 is an external or constant defs use a vector type of
3542 the same size as the output vector type. */
fb85abff 3543 if (!vectype_in)
b334cbba 3544 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
29b41aee 3545 if (vec_stmt)
3546 gcc_assert (vectype_in);
3547 if (!vectype_in)
3548 {
6d8fb6cf 3549 if (dump_enabled_p ())
7d96a007 3550 {
7bd765d4 3551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3552 "no vectype for scalar type ");
3553 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
78bb46f5 3554 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7d96a007 3555 }
29b41aee 3556
3557 return false;
3558 }
fb85abff 3559
b334cbba 3560 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3561 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
7d96a007 3562 if (nunits_in < nunits_out)
fb85abff 3563 modifier = NARROW;
3564 else if (nunits_out == nunits_in)
3565 modifier = NONE;
fb85abff 3566 else
7d96a007 3567 modifier = WIDEN;
fb85abff 3568
282bf14c 3569 /* Multiple types in SLP are handled by creating the appropriate number of
3570 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3571 case of SLP. */
bc937a44 3572 if (slp_node || PURE_SLP_STMT (stmt_info))
fb85abff 3573 ncopies = 1;
7d96a007 3574 else if (modifier == NARROW)
3575 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3576 else
3577 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
48e1416a 3578
fb85abff 3579 /* Sanity check: make sure that at least one copy of the vectorized stmt
3580 needs to be generated. */
3581 gcc_assert (ncopies >= 1);
3582
fb85abff 3583 /* Supportable by target? */
7d96a007 3584 switch (modifier)
fb85abff 3585 {
7d96a007 3586 case NONE:
3587 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3588 return false;
3589 if (supportable_convert_operation (code, vectype_out, vectype_in,
3590 &decl1, &code1))
3591 break;
3592 /* FALLTHRU */
3593 unsupported:
6d8fb6cf 3594 if (dump_enabled_p ())
7bd765d4 3595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 3596 "conversion not supported by target.\n");
fb85abff 3597 return false;
fb85abff 3598
7d96a007 3599 case WIDEN:
3600 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
087dde2d 3601 &code1, &code2, &multi_step_cvt,
3602 &interm_types))
7d96a007 3603 {
3604 /* Binary widening operation can only be supported directly by the
3605 architecture. */
3606 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3607 break;
3608 }
3609
3610 if (code != FLOAT_EXPR
3611 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3612 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3613 goto unsupported;
3614
3615 rhs_mode = TYPE_MODE (rhs_type);
3616 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3617 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3618 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3619 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3620 {
3621 cvt_type
3622 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3623 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3624 if (cvt_type == NULL_TREE)
3625 goto unsupported;
3626
3627 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3628 {
3629 if (!supportable_convert_operation (code, vectype_out,
3630 cvt_type, &decl1, &codecvt1))
3631 goto unsupported;
3632 }
3633 else if (!supportable_widening_operation (code, stmt, vectype_out,
087dde2d 3634 cvt_type, &codecvt1,
3635 &codecvt2, &multi_step_cvt,
7d96a007 3636 &interm_types))
3637 continue;
3638 else
3639 gcc_assert (multi_step_cvt == 0);
3640
3641 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
087dde2d 3642 vectype_in, &code1, &code2,
3643 &multi_step_cvt, &interm_types))
7d96a007 3644 break;
3645 }
3646
3647 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3648 goto unsupported;
3649
3650 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3651 codecvt2 = ERROR_MARK;
3652 else
3653 {
3654 multi_step_cvt++;
f1f41a6c 3655 interm_types.safe_push (cvt_type);
7d96a007 3656 cvt_type = NULL_TREE;
3657 }
3658 break;
3659
3660 case NARROW:
3661 gcc_assert (op_type == unary_op);
3662 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3663 &code1, &multi_step_cvt,
3664 &interm_types))
3665 break;
3666
3667 if (code != FIX_TRUNC_EXPR
3668 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3669 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3670 goto unsupported;
3671
3672 rhs_mode = TYPE_MODE (rhs_type);
3673 cvt_type
3674 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3675 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3676 if (cvt_type == NULL_TREE)
3677 goto unsupported;
3678 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3679 &decl1, &codecvt1))
3680 goto unsupported;
3681 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3682 &code1, &multi_step_cvt,
3683 &interm_types))
3684 break;
3685 goto unsupported;
3686
3687 default:
3688 gcc_unreachable ();
fb85abff 3689 }
3690
3691 if (!vec_stmt) /* transformation not required. */
3692 {
6d8fb6cf 3693 if (dump_enabled_p ())
7bd765d4 3694 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3695 "=== vectorizable_conversion ===\n");
7d96a007 3696 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
5df2530b 3697 {
3698 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4db2b577 3699 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5df2530b 3700 }
7d96a007 3701 else if (modifier == NARROW)
3702 {
3703 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5df2530b 3704 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
7d96a007 3705 }
3706 else
3707 {
3708 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5df2530b 3709 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
7d96a007 3710 }
f1f41a6c 3711 interm_types.release ();
fb85abff 3712 return true;
3713 }
3714
3715 /** Transform. **/
6d8fb6cf 3716 if (dump_enabled_p ())
7bd765d4 3717 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3718 "transform conversion. ncopies = %d.\n", ncopies);
fb85abff 3719
7d96a007 3720 if (op_type == binary_op)
3721 {
3722 if (CONSTANT_CLASS_P (op0))
3723 op0 = fold_convert (TREE_TYPE (op1), op0);
3724 else if (CONSTANT_CLASS_P (op1))
3725 op1 = fold_convert (TREE_TYPE (op0), op1);
3726 }
3727
3728 /* In case of multi-step conversion, we first generate conversion operations
3729 to the intermediate types, and then from that types to the final one.
3730 We create vector destinations for the intermediate type (TYPES) received
3731 from supportable_*_operation, and store them in the correct order
3732 for future use in vect_create_vectorized_*_stmts (). */
f1f41a6c 3733 vec_dsts.create (multi_step_cvt + 1);
3991e956 3734 vec_dest = vect_create_destination_var (scalar_dest,
3735 (cvt_type && modifier == WIDEN)
3736 ? cvt_type : vectype_out);
f1f41a6c 3737 vec_dsts.quick_push (vec_dest);
7d96a007 3738
3739 if (multi_step_cvt)
3740 {
f1f41a6c 3741 for (i = interm_types.length () - 1;
3742 interm_types.iterate (i, &intermediate_type); i--)
7d96a007 3743 {
3744 vec_dest = vect_create_destination_var (scalar_dest,
3745 intermediate_type);
f1f41a6c 3746 vec_dsts.quick_push (vec_dest);
7d96a007 3747 }
3748 }
fb85abff 3749
7d96a007 3750 if (cvt_type)
3991e956 3751 vec_dest = vect_create_destination_var (scalar_dest,
3752 modifier == WIDEN
3753 ? vectype_out : cvt_type);
7d96a007 3754
3755 if (!slp_node)
3756 {
1d4dcacf 3757 if (modifier == WIDEN)
7d96a007 3758 {
9af5ce0c 3759 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
7d96a007 3760 if (op_type == binary_op)
f1f41a6c 3761 vec_oprnds1.create (1);
7d96a007 3762 }
1d4dcacf 3763 else if (modifier == NARROW)
f1f41a6c 3764 vec_oprnds0.create (
3765 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
7d96a007 3766 }
3767 else if (code == WIDEN_LSHIFT_EXPR)
f1f41a6c 3768 vec_oprnds1.create (slp_node->vec_stmts_size);
fb85abff 3769
7d96a007 3770 last_oprnd = op0;
fb85abff 3771 prev_stmt_info = NULL;
3772 switch (modifier)
3773 {
3774 case NONE:
3775 for (j = 0; j < ncopies; j++)
3776 {
fb85abff 3777 if (j == 0)
b0f64919 3778 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3779 -1);
fb85abff 3780 else
3781 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3782
f1f41a6c 3783 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7d96a007 3784 {
3785 /* Arguments are ready, create the new vector stmt. */
3786 if (code1 == CALL_EXPR)
3787 {
3788 new_stmt = gimple_build_call (decl1, 1, vop0);
3789 new_temp = make_ssa_name (vec_dest, new_stmt);
3790 gimple_call_set_lhs (new_stmt, new_temp);
3791 }
3792 else
3793 {
3794 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3795 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3796 vop0, NULL);
3797 new_temp = make_ssa_name (vec_dest, new_stmt);
3798 gimple_assign_set_lhs (new_stmt, new_temp);
3799 }
3800
3801 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3802 if (slp_node)
f1f41a6c 3803 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7d96a007 3804 }
3805
fb85abff 3806 if (j == 0)
3807 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3808 else
3809 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3810 prev_stmt_info = vinfo_for_stmt (new_stmt);
3811 }
3812 break;
3813
3814 case WIDEN:
3815 /* In case the vectorization factor (VF) is bigger than the number
3816 of elements that we can fit in a vectype (nunits), we have to
3817 generate more than one vector stmt - i.e - we need to "unroll"
3818 the vector stmt by a factor VF/nunits. */
3819 for (j = 0; j < ncopies; j++)
3820 {
7d96a007 3821 /* Handle uses. */
fb85abff 3822 if (j == 0)
7d96a007 3823 {
3824 if (slp_node)
3825 {
3826 if (code == WIDEN_LSHIFT_EXPR)
3827 {
3828 unsigned int k;
fb85abff 3829
7d96a007 3830 vec_oprnd1 = op1;
3831 /* Store vec_oprnd1 for every vector stmt to be created
3832 for SLP_NODE. We check during the analysis that all
3833 the shift arguments are the same. */
3834 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
f1f41a6c 3835 vec_oprnds1.quick_push (vec_oprnd1);
7d96a007 3836
3837 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3838 slp_node, -1);
3839 }
3840 else
3841 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3842 &vec_oprnds1, slp_node, -1);
3843 }
3844 else
3845 {
3846 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
f1f41a6c 3847 vec_oprnds0.quick_push (vec_oprnd0);
7d96a007 3848 if (op_type == binary_op)
3849 {
3850 if (code == WIDEN_LSHIFT_EXPR)
3851 vec_oprnd1 = op1;
3852 else
3853 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3854 NULL);
f1f41a6c 3855 vec_oprnds1.quick_push (vec_oprnd1);
7d96a007 3856 }
3857 }
3858 }
fb85abff 3859 else
7d96a007 3860 {
3861 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
f1f41a6c 3862 vec_oprnds0.truncate (0);
3863 vec_oprnds0.quick_push (vec_oprnd0);
7d96a007 3864 if (op_type == binary_op)
3865 {
3866 if (code == WIDEN_LSHIFT_EXPR)
3867 vec_oprnd1 = op1;
3868 else
3869 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3870 vec_oprnd1);
f1f41a6c 3871 vec_oprnds1.truncate (0);
3872 vec_oprnds1.quick_push (vec_oprnd1);
7d96a007 3873 }
3874 }
fb85abff 3875
7d96a007 3876 /* Arguments are ready. Create the new vector stmts. */
3877 for (i = multi_step_cvt; i >= 0; i--)
3878 {
f1f41a6c 3879 tree this_dest = vec_dsts[i];
7d96a007 3880 enum tree_code c1 = code1, c2 = code2;
3881 if (i == 0 && codecvt2 != ERROR_MARK)
3882 {
3883 c1 = codecvt1;
3884 c2 = codecvt2;
3885 }
3886 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3887 &vec_oprnds1,
3888 stmt, this_dest, gsi,
3889 c1, c2, decl1, decl2,
3890 op_type);
3891 }
3892
f1f41a6c 3893 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7d96a007 3894 {
3895 if (cvt_type)
3896 {
3897 if (codecvt1 == CALL_EXPR)
3898 {
3899 new_stmt = gimple_build_call (decl1, 1, vop0);
3900 new_temp = make_ssa_name (vec_dest, new_stmt);
3901 gimple_call_set_lhs (new_stmt, new_temp);
3902 }
3903 else
3904 {
3905 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3906 new_temp = make_ssa_name (vec_dest, NULL);
3907 new_stmt = gimple_build_assign_with_ops (codecvt1,
3908 new_temp,
3909 vop0, NULL);
3910 }
3911
3912 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3913 }
3914 else
3915 new_stmt = SSA_NAME_DEF_STMT (vop0);
3916
3917 if (slp_node)
f1f41a6c 3918 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7d96a007 3919 else
3920 {
3921 if (!prev_stmt_info)
3922 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3923 else
3924 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3925 prev_stmt_info = vinfo_for_stmt (new_stmt);
3926 }
3927 }
fb85abff 3928 }
7d96a007 3929
3930 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
fb85abff 3931 break;
3932
3933 case NARROW:
3934 /* In case the vectorization factor (VF) is bigger than the number
3935 of elements that we can fit in a vectype (nunits), we have to
3936 generate more than one vector stmt - i.e - we need to "unroll"
3937 the vector stmt by a factor VF/nunits. */
3938 for (j = 0; j < ncopies; j++)
3939 {
3940 /* Handle uses. */
7d96a007 3941 if (slp_node)
3942 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3943 slp_node, -1);
fb85abff 3944 else
3945 {
f1f41a6c 3946 vec_oprnds0.truncate (0);
7d96a007 3947 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3948 vect_pow2 (multi_step_cvt) - 1);
fb85abff 3949 }
3950
7d96a007 3951 /* Arguments are ready. Create the new vector stmts. */
3952 if (cvt_type)
f1f41a6c 3953 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7d96a007 3954 {
3955 if (codecvt1 == CALL_EXPR)
3956 {
3957 new_stmt = gimple_build_call (decl1, 1, vop0);
3958 new_temp = make_ssa_name (vec_dest, new_stmt);
3959 gimple_call_set_lhs (new_stmt, new_temp);
3960 }
3961 else
3962 {
3963 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3964 new_temp = make_ssa_name (vec_dest, NULL);
3965 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3966 vop0, NULL);
3967 }
fb85abff 3968
7d96a007 3969 vect_finish_stmt_generation (stmt, new_stmt, gsi);
f1f41a6c 3970 vec_oprnds0[i] = new_temp;
7d96a007 3971 }
fb85abff 3972
7d96a007 3973 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3974 stmt, vec_dsts, gsi,
3975 slp_node, code1,
3976 &prev_stmt_info);
fb85abff 3977 }
3978
3979 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7d96a007 3980 break;
fb85abff 3981 }
3982
f1f41a6c 3983 vec_oprnds0.release ();
3984 vec_oprnds1.release ();
3985 vec_dsts.release ();
3986 interm_types.release ();
fb85abff 3987
3988 return true;
3989}
282bf14c 3990
3991
fb85abff 3992/* Function vectorizable_assignment.
3993
48e1416a 3994 Check if STMT performs an assignment (copy) that can be vectorized.
3995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
fb85abff 3996 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3998
3999static bool
4000vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4001 gimple *vec_stmt, slp_tree slp_node)
4002{
4003 tree vec_dest;
4004 tree scalar_dest;
4005 tree op;
4006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4008 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4009 tree new_temp;
4010 tree def;
4011 gimple def_stmt;
4012 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
99f61dd0 4013 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
fb85abff 4014 int ncopies;
06e40850 4015 int i, j;
1e094109 4016 vec<tree> vec_oprnds = vNULL;
fb85abff 4017 tree vop;
37545e54 4018 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
06e40850 4019 gimple new_stmt = NULL;
4020 stmt_vec_info prev_stmt_info = NULL;
99f61dd0 4021 enum tree_code code;
4022 tree vectype_in;
fb85abff 4023
4024 /* Multiple types in SLP are handled by creating the appropriate number of
4025 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4026 case of SLP. */
bc937a44 4027 if (slp_node || PURE_SLP_STMT (stmt_info))
fb85abff 4028 ncopies = 1;
4029 else
4030 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4031
4032 gcc_assert (ncopies >= 1);
fb85abff 4033
37545e54 4034 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
fb85abff 4035 return false;
4036
f083cd24 4037 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
fb85abff 4038 return false;
4039
4040 /* Is vectorizable assignment? */
4041 if (!is_gimple_assign (stmt))
4042 return false;
4043
4044 scalar_dest = gimple_assign_lhs (stmt);
4045 if (TREE_CODE (scalar_dest) != SSA_NAME)
4046 return false;
4047
99f61dd0 4048 code = gimple_assign_rhs_code (stmt);
fb85abff 4049 if (gimple_assign_single_p (stmt)
99f61dd0 4050 || code == PAREN_EXPR
4051 || CONVERT_EXPR_CODE_P (code))
fb85abff 4052 op = gimple_assign_rhs1 (stmt);
4053 else
4054 return false;
4055
fecf066a 4056 if (code == VIEW_CONVERT_EXPR)
4057 op = TREE_OPERAND (op, 0);
4058
bed8b93b 4059 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
99f61dd0 4060 &def_stmt, &def, &dt[0], &vectype_in))
fb85abff 4061 {
6d8fb6cf 4062 if (dump_enabled_p ())
7bd765d4 4063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4064 "use not simple.\n");
fb85abff 4065 return false;
4066 }
4067
99f61dd0 4068 /* We can handle NOP_EXPR conversions that do not change the number
4069 of elements or the vector size. */
fecf066a 4070 if ((CONVERT_EXPR_CODE_P (code)
4071 || code == VIEW_CONVERT_EXPR)
99f61dd0 4072 && (!vectype_in
4073 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4074 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4075 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4076 return false;
4077
6960a794 4078 /* We do not handle bit-precision changes. */
4079 if ((CONVERT_EXPR_CODE_P (code)
4080 || code == VIEW_CONVERT_EXPR)
4081 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4082 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4083 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4084 || ((TYPE_PRECISION (TREE_TYPE (op))
4085 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4086 /* But a conversion that does not change the bit-pattern is ok. */
4087 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4088 > TYPE_PRECISION (TREE_TYPE (op)))
4089 && TYPE_UNSIGNED (TREE_TYPE (op))))
4090 {
6d8fb6cf 4091 if (dump_enabled_p ())
7bd765d4 4092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4093 "type conversion to/from bit-precision "
78bb46f5 4094 "unsupported.\n");
6960a794 4095 return false;
4096 }
4097
fb85abff 4098 if (!vec_stmt) /* transformation not required. */
4099 {
4100 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6d8fb6cf 4101 if (dump_enabled_p ())
7bd765d4 4102 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 4103 "=== vectorizable_assignment ===\n");
4db2b577 4104 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
fb85abff 4105 return true;
4106 }
4107
4108 /** Transform. **/
6d8fb6cf 4109 if (dump_enabled_p ())
78bb46f5 4110 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
fb85abff 4111
4112 /* Handle def. */
4113 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4114
4115 /* Handle use. */
06e40850 4116 for (j = 0; j < ncopies; j++)
fb85abff 4117 {
06e40850 4118 /* Handle uses. */
4119 if (j == 0)
b0f64919 4120 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
06e40850 4121 else
4122 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4123
4124 /* Arguments are ready. create the new vector stmt. */
f1f41a6c 4125 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
06e40850 4126 {
fecf066a 4127 if (CONVERT_EXPR_CODE_P (code)
4128 || code == VIEW_CONVERT_EXPR)
3ae54e9b 4129 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
06e40850 4130 new_stmt = gimple_build_assign (vec_dest, vop);
4131 new_temp = make_ssa_name (vec_dest, new_stmt);
4132 gimple_assign_set_lhs (new_stmt, new_temp);
4133 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4134 if (slp_node)
f1f41a6c 4135 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
06e40850 4136 }
fb85abff 4137
4138 if (slp_node)
06e40850 4139 continue;
4140
4141 if (j == 0)
4142 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4143 else
4144 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4145
4146 prev_stmt_info = vinfo_for_stmt (new_stmt);
4147 }
48e1416a 4148
f1f41a6c 4149 vec_oprnds.release ();
fb85abff 4150 return true;
4151}
4152
09e31a48 4153
45eea33f 4154/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4155 either as shift by a scalar or by a vector. */
4156
4157bool
4158vect_supportable_shift (enum tree_code code, tree scalar_type)
4159{
4160
3754d046 4161 machine_mode vec_mode;
45eea33f 4162 optab optab;
4163 int icode;
4164 tree vectype;
4165
4166 vectype = get_vectype_for_scalar_type (scalar_type);
4167 if (!vectype)
4168 return false;
4169
4170 optab = optab_for_tree_code (code, vectype, optab_scalar);
4171 if (!optab
4172 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4173 {
4174 optab = optab_for_tree_code (code, vectype, optab_vector);
4175 if (!optab
4176 || (optab_handler (optab, TYPE_MODE (vectype))
4177 == CODE_FOR_nothing))
4178 return false;
4179 }
4180
4181 vec_mode = TYPE_MODE (vectype);
4182 icode = (int) optab_handler (optab, vec_mode);
4183 if (icode == CODE_FOR_nothing)
4184 return false;
4185
4186 return true;
4187}
4188
4189
09e31a48 4190/* Function vectorizable_shift.
4191
4192 Check if STMT performs a shift operation that can be vectorized.
4193 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4194 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4195 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4196
4197static bool
4198vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4199 gimple *vec_stmt, slp_tree slp_node)
4200{
4201 tree vec_dest;
4202 tree scalar_dest;
4203 tree op0, op1 = NULL;
4204 tree vec_oprnd1 = NULL_TREE;
4205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4206 tree vectype;
4207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4208 enum tree_code code;
3754d046 4209 machine_mode vec_mode;
09e31a48 4210 tree new_temp;
4211 optab optab;
4212 int icode;
3754d046 4213 machine_mode optab_op2_mode;
09e31a48 4214 tree def;
4215 gimple def_stmt;
4216 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4217 gimple new_stmt = NULL;
4218 stmt_vec_info prev_stmt_info;
4219 int nunits_in;
4220 int nunits_out;
4221 tree vectype_out;
702ee50d 4222 tree op1_vectype;
09e31a48 4223 int ncopies;
4224 int j, i;
1e094109 4225 vec<tree> vec_oprnds0 = vNULL;
4226 vec<tree> vec_oprnds1 = vNULL;
09e31a48 4227 tree vop0, vop1;
4228 unsigned int k;
c344bf60 4229 bool scalar_shift_arg = true;
09e31a48 4230 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4231 int vf;
4232
4233 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4234 return false;
4235
4236 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4237 return false;
4238
4239 /* Is STMT a vectorizable binary/unary operation? */
4240 if (!is_gimple_assign (stmt))
4241 return false;
4242
4243 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4244 return false;
4245
4246 code = gimple_assign_rhs_code (stmt);
4247
4248 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4249 || code == RROTATE_EXPR))
4250 return false;
4251
4252 scalar_dest = gimple_assign_lhs (stmt);
4253 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6960a794 4254 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4255 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4256 {
6d8fb6cf 4257 if (dump_enabled_p ())
7bd765d4 4258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4259 "bit-precision shifts not supported.\n");
6960a794 4260 return false;
4261 }
09e31a48 4262
4263 op0 = gimple_assign_rhs1 (stmt);
bed8b93b 4264 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
09e31a48 4265 &def_stmt, &def, &dt[0], &vectype))
4266 {
6d8fb6cf 4267 if (dump_enabled_p ())
7bd765d4 4268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4269 "use not simple.\n");
09e31a48 4270 return false;
4271 }
4272 /* If op0 is an external or constant def use a vector type with
4273 the same size as the output vector type. */
4274 if (!vectype)
4275 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4276 if (vec_stmt)
4277 gcc_assert (vectype);
4278 if (!vectype)
4279 {
6d8fb6cf 4280 if (dump_enabled_p ())
7bd765d4 4281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4282 "no vectype for scalar type\n");
09e31a48 4283 return false;
4284 }
4285
4286 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4287 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4288 if (nunits_out != nunits_in)
4289 return false;
4290
4291 op1 = gimple_assign_rhs2 (stmt);
bed8b93b 4292 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4293 &def, &dt[1], &op1_vectype))
09e31a48 4294 {
6d8fb6cf 4295 if (dump_enabled_p ())
7bd765d4 4296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4297 "use not simple.\n");
09e31a48 4298 return false;
4299 }
4300
4301 if (loop_vinfo)
4302 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4303 else
4304 vf = 1;
4305
4306 /* Multiple types in SLP are handled by creating the appropriate number of
4307 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4308 case of SLP. */
bc937a44 4309 if (slp_node || PURE_SLP_STMT (stmt_info))
09e31a48 4310 ncopies = 1;
4311 else
4312 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4313
4314 gcc_assert (ncopies >= 1);
4315
4316 /* Determine whether the shift amount is a vector, or scalar. If the
4317 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4318
c344bf60 4319 if (dt[1] == vect_internal_def && !slp_node)
4320 scalar_shift_arg = false;
4321 else if (dt[1] == vect_constant_def
4322 || dt[1] == vect_external_def
4323 || dt[1] == vect_internal_def)
4324 {
4325 /* In SLP, need to check whether the shift count is the same,
4326 in loops if it is a constant or invariant, it is always
4327 a scalar shift. */
4328 if (slp_node)
4329 {
f1f41a6c 4330 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
c344bf60 4331 gimple slpstmt;
4332
f1f41a6c 4333 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
c344bf60 4334 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4335 scalar_shift_arg = false;
4336 }
4337 }
4338 else
4339 {
6d8fb6cf 4340 if (dump_enabled_p ())
7bd765d4 4341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4342 "operand mode requires invariant argument.\n");
c344bf60 4343 return false;
4344 }
4345
09e31a48 4346 /* Vector shifted by vector. */
c344bf60 4347 if (!scalar_shift_arg)
09e31a48 4348 {
4349 optab = optab_for_tree_code (code, vectype, optab_vector);
6d8fb6cf 4350 if (dump_enabled_p ())
7bd765d4 4351 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 4352 "vector/vector shift/rotate found.\n");
7bd765d4 4353
f381ff7c 4354 if (!op1_vectype)
4355 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4356 if (op1_vectype == NULL_TREE
4357 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
702ee50d 4358 {
6d8fb6cf 4359 if (dump_enabled_p ())
7bd765d4 4360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4361 "unusable type for last operand in"
78bb46f5 4362 " vector/vector shift/rotate.\n");
702ee50d 4363 return false;
4364 }
09e31a48 4365 }
4366 /* See if the machine has a vector shifted by scalar insn and if not
4367 then see if it has a vector shifted by vector insn. */
c344bf60 4368 else
09e31a48 4369 {
4370 optab = optab_for_tree_code (code, vectype, optab_scalar);
4371 if (optab
4372 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4373 {
6d8fb6cf 4374 if (dump_enabled_p ())
7bd765d4 4375 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 4376 "vector/scalar shift/rotate found.\n");
09e31a48 4377 }
4378 else
4379 {
4380 optab = optab_for_tree_code (code, vectype, optab_vector);
4381 if (optab
4382 && (optab_handler (optab, TYPE_MODE (vectype))
4383 != CODE_FOR_nothing))
4384 {
c344bf60 4385 scalar_shift_arg = false;
4386
6d8fb6cf 4387 if (dump_enabled_p ())
7bd765d4 4388 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 4389 "vector/vector shift/rotate found.\n");
09e31a48 4390
4391 /* Unlike the other binary operators, shifts/rotates have
4392 the rhs being int, instead of the same type as the lhs,
4393 so make sure the scalar is the right type if we are
f381ff7c 4394 dealing with vectors of long long/long/short/char. */
09e31a48 4395 if (dt[1] == vect_constant_def)
4396 op1 = fold_convert (TREE_TYPE (vectype), op1);
f381ff7c 4397 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4398 TREE_TYPE (op1)))
4399 {
4400 if (slp_node
4401 && TYPE_MODE (TREE_TYPE (vectype))
4402 != TYPE_MODE (TREE_TYPE (op1)))
4403 {
6d8fb6cf 4404 if (dump_enabled_p ())
7bd765d4 4405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4406 "unusable type for last operand in"
78bb46f5 4407 " vector/vector shift/rotate.\n");
f381ff7c 4408 return false;
4409 }
4410 if (vec_stmt && !slp_node)
4411 {
4412 op1 = fold_convert (TREE_TYPE (vectype), op1);
4413 op1 = vect_init_vector (stmt, op1,
4414 TREE_TYPE (vectype), NULL);
4415 }
4416 }
09e31a48 4417 }
4418 }
4419 }
09e31a48 4420
4421 /* Supportable by target? */
4422 if (!optab)
4423 {
6d8fb6cf 4424 if (dump_enabled_p ())
7bd765d4 4425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4426 "no optab.\n");
09e31a48 4427 return false;
4428 }
4429 vec_mode = TYPE_MODE (vectype);
4430 icode = (int) optab_handler (optab, vec_mode);
4431 if (icode == CODE_FOR_nothing)
4432 {
6d8fb6cf 4433 if (dump_enabled_p ())
7bd765d4 4434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4435 "op not supported by target.\n");
09e31a48 4436 /* Check only during analysis. */
4437 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4438 || (vf < vect_min_worthwhile_factor (code)
4439 && !vec_stmt))
4440 return false;
6d8fb6cf 4441 if (dump_enabled_p ())
78bb46f5 4442 dump_printf_loc (MSG_NOTE, vect_location,
4443 "proceeding using word mode.\n");
09e31a48 4444 }
4445
4446 /* Worthwhile without SIMD support? Check only during analysis. */
4447 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4448 && vf < vect_min_worthwhile_factor (code)
4449 && !vec_stmt)
4450 {
6d8fb6cf 4451 if (dump_enabled_p ())
7bd765d4 4452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4453 "not worthwhile without SIMD support.\n");
09e31a48 4454 return false;
4455 }
4456
4457 if (!vec_stmt) /* transformation not required. */
4458 {
4459 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6d8fb6cf 4460 if (dump_enabled_p ())
78bb46f5 4461 dump_printf_loc (MSG_NOTE, vect_location,
4462 "=== vectorizable_shift ===\n");
4db2b577 4463 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
09e31a48 4464 return true;
4465 }
4466
4467 /** Transform. **/
4468
6d8fb6cf 4469 if (dump_enabled_p ())
7bd765d4 4470 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 4471 "transform binary/unary operation.\n");
09e31a48 4472
4473 /* Handle def. */
4474 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4475
09e31a48 4476 prev_stmt_info = NULL;
4477 for (j = 0; j < ncopies; j++)
4478 {
4479 /* Handle uses. */
4480 if (j == 0)
4481 {
4482 if (scalar_shift_arg)
4483 {
4484 /* Vector shl and shr insn patterns can be defined with scalar
4485 operand 2 (shift operand). In this case, use constant or loop
4486 invariant op1 directly, without extending it to vector mode
4487 first. */
4488 optab_op2_mode = insn_data[icode].operand[2].mode;
4489 if (!VECTOR_MODE_P (optab_op2_mode))
4490 {
6d8fb6cf 4491 if (dump_enabled_p ())
7bd765d4 4492 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 4493 "operand 1 using scalar mode.\n");
09e31a48 4494 vec_oprnd1 = op1;
87c0beb4 4495 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
f1f41a6c 4496 vec_oprnds1.quick_push (vec_oprnd1);
09e31a48 4497 if (slp_node)
4498 {
4499 /* Store vec_oprnd1 for every vector stmt to be created
4500 for SLP_NODE. We check during the analysis that all
4501 the shift arguments are the same.
4502 TODO: Allow different constants for different vector
4503 stmts generated for an SLP instance. */
4504 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
f1f41a6c 4505 vec_oprnds1.quick_push (vec_oprnd1);
09e31a48 4506 }
4507 }
4508 }
4509
4510 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4511 (a special case for certain kind of vector shifts); otherwise,
4512 operand 1 should be of a vector type (the usual case). */
4513 if (vec_oprnd1)
4514 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
b0f64919 4515 slp_node, -1);
09e31a48 4516 else
4517 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
b0f64919 4518 slp_node, -1);
09e31a48 4519 }
4520 else
4521 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4522
4523 /* Arguments are ready. Create the new vector stmt. */
f1f41a6c 4524 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
09e31a48 4525 {
f1f41a6c 4526 vop1 = vec_oprnds1[i];
09e31a48 4527 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4528 new_temp = make_ssa_name (vec_dest, new_stmt);
4529 gimple_assign_set_lhs (new_stmt, new_temp);
4530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4531 if (slp_node)
f1f41a6c 4532 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
09e31a48 4533 }
4534
4535 if (slp_node)
4536 continue;
4537
4538 if (j == 0)
4539 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4540 else
4541 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4542 prev_stmt_info = vinfo_for_stmt (new_stmt);
4543 }
4544
f1f41a6c 4545 vec_oprnds0.release ();
4546 vec_oprnds1.release ();
09e31a48 4547
4548 return true;
4549}
4550
4551
fb85abff 4552/* Function vectorizable_operation.
4553
b9be572e 4554 Check if STMT performs a binary, unary or ternary operation that can
4555 be vectorized.
48e1416a 4556 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
fb85abff 4557 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4558 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4559
4560static bool
4561vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4562 gimple *vec_stmt, slp_tree slp_node)
4563{
ebf4f764 4564 tree vec_dest;
fb85abff 4565 tree scalar_dest;
b9be572e 4566 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
fb85abff 4567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebf4f764 4568 tree vectype;
fb85abff 4569 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4570 enum tree_code code;
3754d046 4571 machine_mode vec_mode;
fb85abff 4572 tree new_temp;
4573 int op_type;
ebf4f764 4574 optab optab;
fb85abff 4575 int icode;
fb85abff 4576 tree def;
4577 gimple def_stmt;
b9be572e 4578 enum vect_def_type dt[3]
4579 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
fb85abff 4580 gimple new_stmt = NULL;
4581 stmt_vec_info prev_stmt_info;
b334cbba 4582 int nunits_in;
fb85abff 4583 int nunits_out;
4584 tree vectype_out;
4585 int ncopies;
4586 int j, i;
1e094109 4587 vec<tree> vec_oprnds0 = vNULL;
4588 vec<tree> vec_oprnds1 = vNULL;
4589 vec<tree> vec_oprnds2 = vNULL;
b9be572e 4590 tree vop0, vop1, vop2;
37545e54 4591 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4592 int vf;
4593
37545e54 4594 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
fb85abff 4595 return false;
4596
f083cd24 4597 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
fb85abff 4598 return false;
4599
4600 /* Is STMT a vectorizable binary/unary operation? */
4601 if (!is_gimple_assign (stmt))
4602 return false;
4603
4604 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4605 return false;
4606
fb85abff 4607 code = gimple_assign_rhs_code (stmt);
4608
4609 /* For pointer addition, we should use the normal plus for
4610 the vector addition. */
4611 if (code == POINTER_PLUS_EXPR)
4612 code = PLUS_EXPR;
4613
4614 /* Support only unary or binary operations. */
4615 op_type = TREE_CODE_LENGTH (code);
b9be572e 4616 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
fb85abff 4617 {
6d8fb6cf 4618 if (dump_enabled_p ())
7bd765d4 4619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4620 "num. args = %d (not unary/binary/ternary op).\n",
7bd765d4 4621 op_type);
fb85abff 4622 return false;
4623 }
4624
b334cbba 4625 scalar_dest = gimple_assign_lhs (stmt);
4626 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4627
6960a794 4628 /* Most operations cannot handle bit-precision types without extra
4629 truncations. */
4630 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4631 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4632 /* Exception are bitwise binary operations. */
4633 && code != BIT_IOR_EXPR
4634 && code != BIT_XOR_EXPR
4635 && code != BIT_AND_EXPR)
4636 {
6d8fb6cf 4637 if (dump_enabled_p ())
7bd765d4 4638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4639 "bit-precision arithmetic not supported.\n");
6960a794 4640 return false;
4641 }
4642
fb85abff 4643 op0 = gimple_assign_rhs1 (stmt);
bed8b93b 4644 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
b334cbba 4645 &def_stmt, &def, &dt[0], &vectype))
fb85abff 4646 {
6d8fb6cf 4647 if (dump_enabled_p ())
7bd765d4 4648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4649 "use not simple.\n");
fb85abff 4650 return false;
4651 }
b334cbba 4652 /* If op0 is an external or constant def use a vector type with
4653 the same size as the output vector type. */
4654 if (!vectype)
4655 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
29b41aee 4656 if (vec_stmt)
4657 gcc_assert (vectype);
4658 if (!vectype)
4659 {
6d8fb6cf 4660 if (dump_enabled_p ())
29b41aee 4661 {
7bd765d4 4662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4663 "no vectype for scalar type ");
4664 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4665 TREE_TYPE (op0));
78bb46f5 4666 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
29b41aee 4667 }
4668
4669 return false;
4670 }
b334cbba 4671
4672 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4673 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4674 if (nunits_out != nunits_in)
4675 return false;
fb85abff 4676
b9be572e 4677 if (op_type == binary_op || op_type == ternary_op)
fb85abff 4678 {
4679 op1 = gimple_assign_rhs2 (stmt);
bed8b93b 4680 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4681 &def, &dt[1]))
fb85abff 4682 {
6d8fb6cf 4683 if (dump_enabled_p ())
7bd765d4 4684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4685 "use not simple.\n");
fb85abff 4686 return false;
4687 }
4688 }
b9be572e 4689 if (op_type == ternary_op)
4690 {
4691 op2 = gimple_assign_rhs3 (stmt);
bed8b93b 4692 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4693 &def, &dt[2]))
b9be572e 4694 {
6d8fb6cf 4695 if (dump_enabled_p ())
7bd765d4 4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4697 "use not simple.\n");
b9be572e 4698 return false;
4699 }
4700 }
fb85abff 4701
b334cbba 4702 if (loop_vinfo)
4703 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4704 else
4705 vf = 1;
4706
4707 /* Multiple types in SLP are handled by creating the appropriate number of
282bf14c 4708 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b334cbba 4709 case of SLP. */
bc937a44 4710 if (slp_node || PURE_SLP_STMT (stmt_info))
b334cbba 4711 ncopies = 1;
4712 else
4713 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4714
4715 gcc_assert (ncopies >= 1);
4716
09e31a48 4717 /* Shifts are handled in vectorizable_shift (). */
fb85abff 4718 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4719 || code == RROTATE_EXPR)
09e31a48 4720 return false;
fb85abff 4721
fb85abff 4722 /* Supportable by target? */
ebf4f764 4723
4724 vec_mode = TYPE_MODE (vectype);
4725 if (code == MULT_HIGHPART_EXPR)
fb85abff 4726 {
ebf4f764 4727 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
d386876e 4728 icode = LAST_INSN_CODE;
ebf4f764 4729 else
4730 icode = CODE_FOR_nothing;
fb85abff 4731 }
ebf4f764 4732 else
4733 {
4734 optab = optab_for_tree_code (code, vectype, optab_default);
4735 if (!optab)
3af51fe9 4736 {
6d8fb6cf 4737 if (dump_enabled_p ())
7bd765d4 4738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4739 "no optab.\n");
ebf4f764 4740 return false;
3af51fe9 4741 }
ebf4f764 4742 icode = (int) optab_handler (optab, vec_mode);
3af51fe9 4743 }
4744
fb85abff 4745 if (icode == CODE_FOR_nothing)
4746 {
6d8fb6cf 4747 if (dump_enabled_p ())
7bd765d4 4748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4749 "op not supported by target.\n");
fb85abff 4750 /* Check only during analysis. */
4751 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3af51fe9 4752 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
fb85abff 4753 return false;
6d8fb6cf 4754 if (dump_enabled_p ())
78bb46f5 4755 dump_printf_loc (MSG_NOTE, vect_location,
4756 "proceeding using word mode.\n");
0eee81bc 4757 }
4758
7d96a007 4759 /* Worthwhile without SIMD support? Check only during analysis. */
3af51fe9 4760 if (!VECTOR_MODE_P (vec_mode)
4761 && !vec_stmt
4762 && vf < vect_min_worthwhile_factor (code))
29b41aee 4763 {
6d8fb6cf 4764 if (dump_enabled_p ())
7bd765d4 4765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 4766 "not worthwhile without SIMD support.\n");
ff0a55b7 4767 return false;
29b41aee 4768 }
fb85abff 4769
fb85abff 4770 if (!vec_stmt) /* transformation not required. */
4771 {
7d96a007 4772 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6d8fb6cf 4773 if (dump_enabled_p ())
7bd765d4 4774 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 4775 "=== vectorizable_operation ===\n");
4db2b577 4776 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
fb85abff 4777 return true;
4778 }
4779
4780 /** Transform. **/
4781
6d8fb6cf 4782 if (dump_enabled_p ())
7bd765d4 4783 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 4784 "transform binary/unary operation.\n");
0eee81bc 4785
fb85abff 4786 /* Handle def. */
ebf4f764 4787 vec_dest = vect_create_destination_var (scalar_dest, vectype);
48e1416a 4788
fb85abff 4789 /* In case the vectorization factor (VF) is bigger than the number
4790 of elements that we can fit in a vectype (nunits), we have to generate
4791 more than one vector stmt - i.e - we need to "unroll" the
7d96a007 4792 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4793 from one copy of the vector stmt to the next, in the field
4794 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4795 stages to find the correct vector defs to be used when vectorizing
4796 stmts that use the defs of the current stmt. The example below
4797 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4798 we need to create 4 vectorized stmts):
4799
4800 before vectorization:
4801 RELATED_STMT VEC_STMT
4802 S1: x = memref - -
4803 S2: z = x + 1 - -
4804
4805 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4806 there):
4807 RELATED_STMT VEC_STMT
4808 VS1_0: vx0 = memref0 VS1_1 -
4809 VS1_1: vx1 = memref1 VS1_2 -
4810 VS1_2: vx2 = memref2 VS1_3 -
4811 VS1_3: vx3 = memref3 - -
4812 S1: x = load - VS1_0
4813 S2: z = x + 1 - -
4814
4815 step2: vectorize stmt S2 (done here):
4816 To vectorize stmt S2 we first need to find the relevant vector
4817 def for the first operand 'x'. This is, as usual, obtained from
4818 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4819 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4820 relevant vector def 'vx0'. Having found 'vx0' we can generate
4821 the vector stmt VS2_0, and as usual, record it in the
4822 STMT_VINFO_VEC_STMT of stmt S2.
4823 When creating the second copy (VS2_1), we obtain the relevant vector
4824 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4825 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4826 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4827 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4828 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4829 chain of stmts and pointers:
4830 RELATED_STMT VEC_STMT
4831 VS1_0: vx0 = memref0 VS1_1 -
4832 VS1_1: vx1 = memref1 VS1_2 -
4833 VS1_2: vx2 = memref2 VS1_3 -
4834 VS1_3: vx3 = memref3 - -
4835 S1: x = load - VS1_0
4836 VS2_0: vz0 = vx0 + v1 VS2_1 -
4837 VS2_1: vz1 = vx1 + v1 VS2_2 -
4838 VS2_2: vz2 = vx2 + v1 VS2_3 -
4839 VS2_3: vz3 = vx3 + v1 - -
4840 S2: z = x + 1 - VS2_0 */
fb85abff 4841
4842 prev_stmt_info = NULL;
4843 for (j = 0; j < ncopies; j++)
4844 {
4845 /* Handle uses. */
4846 if (j == 0)
7d96a007 4847 {
4848 if (op_type == binary_op || op_type == ternary_op)
4849 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4850 slp_node, -1);
4851 else
4852 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4853 slp_node, -1);
4854 if (op_type == ternary_op)
6083c152 4855 {
f1f41a6c 4856 vec_oprnds2.create (1);
4857 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4858 stmt,
4859 NULL));
6083c152 4860 }
7d96a007 4861 }
fb85abff 4862 else
7d96a007 4863 {
4864 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4865 if (op_type == ternary_op)
4866 {
f1f41a6c 4867 tree vec_oprnd = vec_oprnds2.pop ();
4868 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4869 vec_oprnd));
7d96a007 4870 }
4871 }
4872
4873 /* Arguments are ready. Create the new vector stmt. */
f1f41a6c 4874 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
fb85abff 4875 {
7d96a007 4876 vop1 = ((op_type == binary_op || op_type == ternary_op)
f1f41a6c 4877 ? vec_oprnds1[i] : NULL_TREE);
7d96a007 4878 vop2 = ((op_type == ternary_op)
f1f41a6c 4879 ? vec_oprnds2[i] : NULL_TREE);
446e85eb 4880 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4881 vop0, vop1, vop2);
7d96a007 4882 new_temp = make_ssa_name (vec_dest, new_stmt);
4883 gimple_assign_set_lhs (new_stmt, new_temp);
4884 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4885 if (slp_node)
f1f41a6c 4886 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
fb85abff 4887 }
4888
7d96a007 4889 if (slp_node)
4890 continue;
4891
4892 if (j == 0)
4893 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4894 else
4895 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4896 prev_stmt_info = vinfo_for_stmt (new_stmt);
fb85abff 4897 }
4898
f1f41a6c 4899 vec_oprnds0.release ();
4900 vec_oprnds1.release ();
4901 vec_oprnds2.release ();
fb85abff 4902
fb85abff 4903 return true;
4904}
4905
23e1875f 4906/* A helper function to ensure data reference DR's base alignment
4907 for STMT_INFO. */
4908
4909static void
4910ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4911{
4912 if (!dr->aux)
4913 return;
4914
4915 if (((dataref_aux *)dr->aux)->base_misaligned)
4916 {
4917 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4918 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4919
4920 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4921 DECL_USER_ALIGN (base_decl) = 1;
4922 ((dataref_aux *)dr->aux)->base_misaligned = false;
4923 }
4924}
4925
fb85abff 4926
ae70175f 4927/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4928 reversal of the vector elements. If that is impossible to do,
4929 returns NULL. */
4930
4931static tree
4932perm_mask_for_reverse (tree vectype)
4933{
4934 int i, nunits;
4935 unsigned char *sel;
4936
4937 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4938 sel = XALLOCAVEC (unsigned char, nunits);
4939
4940 for (i = 0; i < nunits; ++i)
4941 sel[i] = nunits - 1 - i;
4942
4943 return vect_gen_perm_mask (vectype, sel);
4944}
4945
fb85abff 4946/* Function vectorizable_store.
4947
48e1416a 4948 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4949 can be vectorized.
4950 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
fb85abff 4951 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4952 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4953
4954static bool
4955vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
23e1875f 4956 slp_tree slp_node)
fb85abff 4957{
4958 tree scalar_dest;
4959 tree data_ref;
4960 tree op;
4961 tree vec_oprnd = NULL_TREE;
4962 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4963 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4964 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
94b7b4dd 4965 tree elem_type;
fb85abff 4966 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 4967 struct loop *loop = NULL;
3754d046 4968 machine_mode vec_mode;
fb85abff 4969 tree dummy;
4970 enum dr_alignment_support alignment_support_scheme;
4971 tree def;
4972 gimple def_stmt;
4973 enum vect_def_type dt;
4974 stmt_vec_info prev_stmt_info = NULL;
4975 tree dataref_ptr = NULL_TREE;
3d483a94 4976 tree dataref_offset = NULL_TREE;
5c90e7b6 4977 gimple ptr_incr = NULL;
fb85abff 4978 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4979 int ncopies;
4980 int j;
4981 gimple next_stmt, first_stmt = NULL;
ee612634 4982 bool grouped_store = false;
94b7b4dd 4983 bool store_lanes_p = false;
fb85abff 4984 unsigned int group_size, i;
1e094109 4985 vec<tree> dr_chain = vNULL;
4986 vec<tree> oprnds = vNULL;
4987 vec<tree> result_chain = vNULL;
fb85abff 4988 bool inv_p;
ae70175f 4989 bool negative = false;
4990 tree offset = NULL_TREE;
1e094109 4991 vec<tree> vec_oprnds = vNULL;
fb85abff 4992 bool slp = (slp_node != NULL);
fb85abff 4993 unsigned int vec_num;
37545e54 4994 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
94b7b4dd 4995 tree aggr_type;
37545e54 4996
4997 if (loop_vinfo)
4998 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 4999
5000 /* Multiple types in SLP are handled by creating the appropriate number of
5001 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5002 case of SLP. */
bc937a44 5003 if (slp || PURE_SLP_STMT (stmt_info))
fb85abff 5004 ncopies = 1;
5005 else
5006 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5007
5008 gcc_assert (ncopies >= 1);
5009
5010 /* FORNOW. This restriction should be relaxed. */
37545e54 5011 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
fb85abff 5012 {
6d8fb6cf 5013 if (dump_enabled_p ())
7bd765d4 5014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5015 "multiple types in nested loop.\n");
fb85abff 5016 return false;
5017 }
5018
37545e54 5019 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
fb85abff 5020 return false;
5021
f083cd24 5022 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
fb85abff 5023 return false;
5024
5025 /* Is vectorizable store? */
5026
5027 if (!is_gimple_assign (stmt))
5028 return false;
5029
5030 scalar_dest = gimple_assign_lhs (stmt);
d6152abc 5031 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5032 && is_pattern_stmt_p (stmt_info))
5033 scalar_dest = TREE_OPERAND (scalar_dest, 0);
fb85abff 5034 if (TREE_CODE (scalar_dest) != ARRAY_REF
19bacd59 5035 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
fb85abff 5036 && TREE_CODE (scalar_dest) != INDIRECT_REF
10095225 5037 && TREE_CODE (scalar_dest) != COMPONENT_REF
5038 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
182cf5a9 5039 && TREE_CODE (scalar_dest) != REALPART_EXPR
5040 && TREE_CODE (scalar_dest) != MEM_REF)
fb85abff 5041 return false;
5042
5043 gcc_assert (gimple_assign_single_p (stmt));
5044 op = gimple_assign_rhs1 (stmt);
bed8b93b 5045 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5046 &def, &dt))
fb85abff 5047 {
6d8fb6cf 5048 if (dump_enabled_p ())
7bd765d4 5049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5050 "use not simple.\n");
fb85abff 5051 return false;
5052 }
5053
94b7b4dd 5054 elem_type = TREE_TYPE (vectype);
fb85abff 5055 vec_mode = TYPE_MODE (vectype);
6960a794 5056
fb85abff 5057 /* FORNOW. In some cases can vectorize even if data-type not supported
5058 (e.g. - array initialization with 0). */
d6bf3b14 5059 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
fb85abff 5060 return false;
5061
5062 if (!STMT_VINFO_DATA_REF (stmt_info))
5063 return false;
5064
ae70175f 5065 negative =
5066 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5067 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5068 size_zero_node) < 0;
5069 if (negative && ncopies > 1)
1aa22f27 5070 {
6d8fb6cf 5071 if (dump_enabled_p ())
7bd765d4 5072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13bad991 5073 "multiple types with negative step.\n");
1aa22f27 5074 return false;
5075 }
5076
ae70175f 5077 if (negative)
5078 {
5079 gcc_assert (!grouped_store);
5080 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5081 if (alignment_support_scheme != dr_aligned
5082 && alignment_support_scheme != dr_unaligned_supported)
5083 {
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13bad991 5086 "negative step but alignment required.\n");
ae70175f 5087 return false;
5088 }
13bad991 5089 if (dt != vect_constant_def
5090 && dt != vect_external_def
5091 && !perm_mask_for_reverse (vectype))
ae70175f 5092 {
5093 if (dump_enabled_p ())
5094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13bad991 5095 "negative step and reversing not supported.\n");
ae70175f 5096 return false;
5097 }
5098 }
5099
ee612634 5100 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
fb85abff 5101 {
ee612634 5102 grouped_store = true;
21009880 5103 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
481fc474 5104 if (!slp && !PURE_SLP_STMT (stmt_info))
5105 {
21009880 5106 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
94b7b4dd 5107 if (vect_store_lanes_supported (vectype, group_size))
5108 store_lanes_p = true;
ee612634 5109 else if (!vect_grouped_store_supported (vectype, group_size))
481fc474 5110 return false;
5111 }
48e1416a 5112
fb85abff 5113 if (first_stmt == stmt)
5114 {
5115 /* STMT is the leader of the group. Check the operands of all the
5116 stmts of the group. */
21009880 5117 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
fb85abff 5118 while (next_stmt)
5119 {
5120 gcc_assert (gimple_assign_single_p (next_stmt));
5121 op = gimple_assign_rhs1 (next_stmt);
bed8b93b 5122 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5123 &def_stmt, &def, &dt))
fb85abff 5124 {
6d8fb6cf 5125 if (dump_enabled_p ())
7bd765d4 5126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5127 "use not simple.\n");
fb85abff 5128 return false;
5129 }
21009880 5130 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
fb85abff 5131 }
5132 }
5133 }
5134
5135 if (!vec_stmt) /* transformation not required. */
5136 {
5137 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
f97dec81 5138 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5139 NULL, NULL, NULL);
fb85abff 5140 return true;
5141 }
5142
5143 /** Transform. **/
5144
23e1875f 5145 ensure_base_align (stmt_info, dr);
5146
ee612634 5147 if (grouped_store)
fb85abff 5148 {
5149 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
21009880 5150 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
fb85abff 5151
21009880 5152 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
fb85abff 5153
5154 /* FORNOW */
37545e54 5155 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
fb85abff 5156
5157 /* We vectorize all the stmts of the interleaving group when we
5158 reach the last stmt in the group. */
21009880 5159 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5160 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
fb85abff 5161 && !slp)
5162 {
5163 *vec_stmt = NULL;
5164 return true;
5165 }
5166
5167 if (slp)
6ea6a380 5168 {
ee612634 5169 grouped_store = false;
6ea6a380 5170 /* VEC_NUM is the number of vect stmts to be created for this
5171 group. */
5172 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
f1f41a6c 5173 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6ea6a380 5174 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
b0f64919 5175 op = gimple_assign_rhs1 (first_stmt);
6ea6a380 5176 }
fb85abff 5177 else
6ea6a380 5178 /* VEC_NUM is the number of vect stmts to be created for this
5179 group. */
fb85abff 5180 vec_num = group_size;
5181 }
48e1416a 5182 else
fb85abff 5183 {
5184 first_stmt = stmt;
5185 first_dr = dr;
5186 group_size = vec_num = 1;
fb85abff 5187 }
48e1416a 5188
6d8fb6cf 5189 if (dump_enabled_p ())
7bd765d4 5190 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 5191 "transform store. ncopies = %d\n", ncopies);
fb85abff 5192
f1f41a6c 5193 dr_chain.create (group_size);
5194 oprnds.create (group_size);
fb85abff 5195
0822b158 5196 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
fb85abff 5197 gcc_assert (alignment_support_scheme);
94b7b4dd 5198 /* Targets with store-lane instructions must not require explicit
5199 realignment. */
5200 gcc_assert (!store_lanes_p
5201 || alignment_support_scheme == dr_aligned
5202 || alignment_support_scheme == dr_unaligned_supported);
5203
ae70175f 5204 if (negative)
5205 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5206
94b7b4dd 5207 if (store_lanes_p)
5208 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5209 else
5210 aggr_type = vectype;
fb85abff 5211
5212 /* In case the vectorization factor (VF) is bigger than the number
5213 of elements that we can fit in a vectype (nunits), we have to generate
5214 more than one vector stmt - i.e - we need to "unroll" the
48e1416a 5215 vector stmt by a factor VF/nunits. For more details see documentation in
fb85abff 5216 vect_get_vec_def_for_copy_stmt. */
5217
ee612634 5218 /* In case of interleaving (non-unit grouped access):
fb85abff 5219
5220 S1: &base + 2 = x2
5221 S2: &base = x0
5222 S3: &base + 1 = x1
5223 S4: &base + 3 = x3
5224
5225 We create vectorized stores starting from base address (the access of the
5226 first stmt in the chain (S2 in the above example), when the last store stmt
5227 of the chain (S4) is reached:
5228
5229 VS1: &base = vx2
5230 VS2: &base + vec_size*1 = vx0
5231 VS3: &base + vec_size*2 = vx1
5232 VS4: &base + vec_size*3 = vx3
5233
5234 Then permutation statements are generated:
5235
8bec2124 5236 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5237 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
fb85abff 5238 ...
48e1416a 5239
fb85abff 5240 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5241 (the order of the data-refs in the output of vect_permute_store_chain
5242 corresponds to the order of scalar stmts in the interleaving chain - see
5243 the documentation of vect_permute_store_chain()).
5244
5245 In case of both multiple types and interleaving, above vector stores and
282bf14c 5246 permutation stmts are created for every copy. The result vector stmts are
fb85abff 5247 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
48e1416a 5248 STMT_VINFO_RELATED_STMT for the next copies.
fb85abff 5249 */
5250
5251 prev_stmt_info = NULL;
5252 for (j = 0; j < ncopies; j++)
5253 {
5254 gimple new_stmt;
fb85abff 5255
5256 if (j == 0)
5257 {
5258 if (slp)
5259 {
5260 /* Get vectorized arguments for SLP_NODE. */
b0f64919 5261 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5262 NULL, slp_node, -1);
fb85abff 5263
f1f41a6c 5264 vec_oprnd = vec_oprnds[0];
fb85abff 5265 }
5266 else
5267 {
48e1416a 5268 /* For interleaved stores we collect vectorized defs for all the
5269 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5270 used as an input to vect_permute_store_chain(), and OPRNDS as
fb85abff 5271 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5272
ee612634 5273 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
fb85abff 5274 OPRNDS are of size 1. */
48e1416a 5275 next_stmt = first_stmt;
fb85abff 5276 for (i = 0; i < group_size; i++)
5277 {
48e1416a 5278 /* Since gaps are not supported for interleaved stores,
5279 GROUP_SIZE is the exact number of stmts in the chain.
5280 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5281 there is no interleaving, GROUP_SIZE is 1, and only one
fb85abff 5282 iteration of the loop will be executed. */
5283 gcc_assert (next_stmt
5284 && gimple_assign_single_p (next_stmt));
5285 op = gimple_assign_rhs1 (next_stmt);
5286
48e1416a 5287 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
fb85abff 5288 NULL);
f1f41a6c 5289 dr_chain.quick_push (vec_oprnd);
5290 oprnds.quick_push (vec_oprnd);
21009880 5291 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
fb85abff 5292 }
5293 }
5294
5295 /* We should have catched mismatched types earlier. */
5296 gcc_assert (useless_type_conversion_p (vectype,
5297 TREE_TYPE (vec_oprnd)));
3d483a94 5298 bool simd_lane_access_p
5299 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5300 if (simd_lane_access_p
5301 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5302 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5303 && integer_zerop (DR_OFFSET (first_dr))
5304 && integer_zerop (DR_INIT (first_dr))
5305 && alias_sets_conflict_p (get_alias_set (aggr_type),
5306 get_alias_set (DR_REF (first_dr))))
5307 {
5308 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5309 dataref_offset = build_int_cst (reference_alias_ptr_type
5310 (DR_REF (first_dr)), 0);
8e1a382d 5311 inv_p = false;
3d483a94 5312 }
5313 else
5314 dataref_ptr
5315 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5316 simd_lane_access_p ? loop : NULL,
ae70175f 5317 offset, &dummy, gsi, &ptr_incr,
3d483a94 5318 simd_lane_access_p, &inv_p);
37545e54 5319 gcc_assert (bb_vinfo || !inv_p);
fb85abff 5320 }
48e1416a 5321 else
fb85abff 5322 {
48e1416a 5323 /* For interleaved stores we created vectorized defs for all the
5324 defs stored in OPRNDS in the previous iteration (previous copy).
5325 DR_CHAIN is then used as an input to vect_permute_store_chain(),
fb85abff 5326 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5327 next copy.
ee612634 5328 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
fb85abff 5329 OPRNDS are of size 1. */
5330 for (i = 0; i < group_size; i++)
5331 {
f1f41a6c 5332 op = oprnds[i];
bed8b93b 5333 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5334 &def, &dt);
48e1416a 5335 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
f1f41a6c 5336 dr_chain[i] = vec_oprnd;
5337 oprnds[i] = vec_oprnd;
fb85abff 5338 }
3d483a94 5339 if (dataref_offset)
5340 dataref_offset
5341 = int_const_binop (PLUS_EXPR, dataref_offset,
5342 TYPE_SIZE_UNIT (aggr_type));
5343 else
5344 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5345 TYPE_SIZE_UNIT (aggr_type));
fb85abff 5346 }
5347
94b7b4dd 5348 if (store_lanes_p)
fb85abff 5349 {
94b7b4dd 5350 tree vec_array;
ff99c834 5351
94b7b4dd 5352 /* Combine all the vectors into an array. */
5353 vec_array = create_vector_array (vectype, vec_num);
5354 for (i = 0; i < vec_num; i++)
2572aaf3 5355 {
f1f41a6c 5356 vec_oprnd = dr_chain[i];
94b7b4dd 5357 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
ff99c834 5358 }
48e1416a 5359
94b7b4dd 5360 /* Emit:
5361 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5362 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5363 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5364 gimple_call_set_lhs (new_stmt, data_ref);
ff99c834 5365 vect_finish_stmt_generation (stmt, new_stmt, gsi);
94b7b4dd 5366 }
5367 else
5368 {
5369 new_stmt = NULL;
ee612634 5370 if (grouped_store)
94b7b4dd 5371 {
f40aaf2d 5372 if (j == 0)
5373 result_chain.create (group_size);
94b7b4dd 5374 /* Permute. */
5375 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5376 &result_chain);
5377 }
2572aaf3 5378
94b7b4dd 5379 next_stmt = first_stmt;
5380 for (i = 0; i < vec_num; i++)
5381 {
ceea063b 5382 unsigned align, misalign;
94b7b4dd 5383
5384 if (i > 0)
5385 /* Bump the vector pointer. */
5386 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5387 stmt, NULL_TREE);
5388
5389 if (slp)
f1f41a6c 5390 vec_oprnd = vec_oprnds[i];
ee612634 5391 else if (grouped_store)
5392 /* For grouped stores vectorized defs are interleaved in
94b7b4dd 5393 vect_permute_store_chain(). */
f1f41a6c 5394 vec_oprnd = result_chain[i];
94b7b4dd 5395
5396 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3d483a94 5397 dataref_offset
5398 ? dataref_offset
5399 : build_int_cst (reference_alias_ptr_type
5400 (DR_REF (first_dr)), 0));
ceea063b 5401 align = TYPE_ALIGN_UNIT (vectype);
94b7b4dd 5402 if (aligned_access_p (first_dr))
ceea063b 5403 misalign = 0;
94b7b4dd 5404 else if (DR_MISALIGNMENT (first_dr) == -1)
5405 {
5406 TREE_TYPE (data_ref)
5407 = build_aligned_type (TREE_TYPE (data_ref),
5408 TYPE_ALIGN (elem_type));
ceea063b 5409 align = TYPE_ALIGN_UNIT (elem_type);
5410 misalign = 0;
94b7b4dd 5411 }
5412 else
5413 {
5414 TREE_TYPE (data_ref)
5415 = build_aligned_type (TREE_TYPE (data_ref),
5416 TYPE_ALIGN (elem_type));
ceea063b 5417 misalign = DR_MISALIGNMENT (first_dr);
94b7b4dd 5418 }
3d483a94 5419 if (dataref_offset == NULL_TREE)
5420 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5421 misalign);
2572aaf3 5422
13bad991 5423 if (negative
5424 && dt != vect_constant_def
5425 && dt != vect_external_def)
ae70175f 5426 {
5427 tree perm_mask = perm_mask_for_reverse (vectype);
5428 tree perm_dest
5429 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5430 vectype);
5431 tree new_temp = make_ssa_name (perm_dest, NULL);
5432
5433 /* Generate the permute statement. */
5434 gimple perm_stmt
5435 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5436 vec_oprnd, vec_oprnd,
5437 perm_mask);
5438 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5439
5440 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5441 vec_oprnd = new_temp;
5442 }
5443
94b7b4dd 5444 /* Arguments are ready. Create the new vector stmt. */
5445 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
94b7b4dd 5447
5448 if (slp)
5449 continue;
5450
21009880 5451 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
94b7b4dd 5452 if (!next_stmt)
5453 break;
5454 }
fb85abff 5455 }
63fd99db 5456 if (!slp)
5457 {
5458 if (j == 0)
5459 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5460 else
5461 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5462 prev_stmt_info = vinfo_for_stmt (new_stmt);
5463 }
fb85abff 5464 }
5465
f1f41a6c 5466 dr_chain.release ();
5467 oprnds.release ();
5468 result_chain.release ();
5469 vec_oprnds.release ();
fb85abff 5470
5471 return true;
5472}
5473
16dfb112 5474/* Given a vector type VECTYPE and permutation SEL returns
5475 the VECTOR_CST mask that implements the permutation of the
5476 vector elements. If that is impossible to do, returns NULL. */
1aa22f27 5477
8bec2124 5478tree
5479vect_gen_perm_mask (tree vectype, unsigned char *sel)
1aa22f27 5480{
fadf62f4 5481 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
964ec9cd 5482 int i, nunits;
1aa22f27 5483
e21c468f 5484 nunits = TYPE_VECTOR_SUBPARTS (vectype);
e21c468f 5485
5486 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1aa22f27 5487 return NULL;
5488
3cea8318 5489 mask_elt_type = lang_hooks.types.type_for_mode
5490 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
e21c468f 5491 mask_type = get_vectype_for_scalar_type (mask_elt_type);
1aa22f27 5492
fadf62f4 5493 mask_elts = XALLOCAVEC (tree, nunits);
16dfb112 5494 for (i = nunits - 1; i >= 0; i--)
fadf62f4 5495 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5496 mask_vec = build_vector (mask_type, mask_elts);
1aa22f27 5497
964ec9cd 5498 return mask_vec;
1aa22f27 5499}
5500
16dfb112 5501/* Given a vector variable X and Y, that was generated for the scalar
5502 STMT, generate instructions to permute the vector elements of X and Y
5503 using permutation mask MASK_VEC, insert them at *GSI and return the
5504 permuted vector variable. */
1aa22f27 5505
5506static tree
16dfb112 5507permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5508 gimple_stmt_iterator *gsi)
1aa22f27 5509{
5510 tree vectype = TREE_TYPE (x);
16dfb112 5511 tree perm_dest, data_ref;
1aa22f27 5512 gimple perm_stmt;
5513
dbe41d8c 5514 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
16dfb112 5515 data_ref = make_ssa_name (perm_dest, NULL);
1aa22f27 5516
5517 /* Generate the permute statement. */
446e85eb 5518 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5519 x, y, mask_vec);
1aa22f27 5520 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5521
5522 return data_ref;
5523}
5524
500fffe1 5525/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5526 inserting them on the loops preheader edge. Returns true if we
5527 were successful in doing so (and thus STMT can be moved then),
5528 otherwise returns false. */
5529
5530static bool
5531hoist_defs_of_uses (gimple stmt, struct loop *loop)
5532{
5533 ssa_op_iter i;
5534 tree op;
5535 bool any = false;
5536
5537 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5538 {
5539 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5540 if (!gimple_nop_p (def_stmt)
5541 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5542 {
5543 /* Make sure we don't need to recurse. While we could do
5544 so in simple cases when there are more complex use webs
5545 we don't have an easy way to preserve stmt order to fulfil
5546 dependencies within them. */
5547 tree op2;
5548 ssa_op_iter i2;
c7a8722c 5549 if (gimple_code (def_stmt) == GIMPLE_PHI)
5550 return false;
500fffe1 5551 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5552 {
5553 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5554 if (!gimple_nop_p (def_stmt2)
5555 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5556 return false;
5557 }
5558 any = true;
5559 }
5560 }
5561
5562 if (!any)
5563 return true;
5564
5565 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5566 {
5567 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5568 if (!gimple_nop_p (def_stmt)
5569 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5570 {
5571 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5572 gsi_remove (&gsi, false);
5573 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5574 }
5575 }
5576
5577 return true;
5578}
5579
fb85abff 5580/* vectorizable_load.
5581
48e1416a 5582 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5583 can be vectorized.
5584 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
fb85abff 5585 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5586 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5587
5588static bool
5589vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
23e1875f 5590 slp_tree slp_node, slp_instance slp_node_instance)
fb85abff 5591{
5592 tree scalar_dest;
5593 tree vec_dest = NULL;
5594 tree data_ref = NULL;
5595 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
48e1416a 5596 stmt_vec_info prev_stmt_info;
fb85abff 5597 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 5598 struct loop *loop = NULL;
fb85abff 5599 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
37545e54 5600 bool nested_in_vect_loop = false;
23e1875f 5601 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
fb85abff 5602 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
94b7b4dd 5603 tree elem_type;
fb85abff 5604 tree new_temp;
3754d046 5605 machine_mode mode;
fb85abff 5606 gimple new_stmt = NULL;
5607 tree dummy;
5608 enum dr_alignment_support alignment_support_scheme;
5609 tree dataref_ptr = NULL_TREE;
3d483a94 5610 tree dataref_offset = NULL_TREE;
5c90e7b6 5611 gimple ptr_incr = NULL;
fb85abff 5612 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5613 int ncopies;
8f3e4987 5614 int i, j, group_size, group_gap;
fb85abff 5615 tree msq = NULL_TREE, lsq;
5616 tree offset = NULL_TREE;
1ec61bbd 5617 tree byte_offset = NULL_TREE;
fb85abff 5618 tree realignment_token = NULL_TREE;
5619 gimple phi = NULL;
1e094109 5620 vec<tree> dr_chain = vNULL;
ee612634 5621 bool grouped_load = false;
94b7b4dd 5622 bool load_lanes_p = false;
fb85abff 5623 gimple first_stmt;
fb85abff 5624 bool inv_p;
f634c3e9 5625 bool negative = false;
fb85abff 5626 bool compute_in_loop = false;
5627 struct loop *at_loop;
5628 int vec_num;
5629 bool slp = (slp_node != NULL);
5630 bool slp_perm = false;
5631 enum tree_code code;
37545e54 5632 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5633 int vf;
94b7b4dd 5634 tree aggr_type;
16dfb112 5635 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5636 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5637 int gather_scale = 1;
5638 enum vect_def_type gather_dt = vect_unknown_def_type;
37545e54 5639
5640 if (loop_vinfo)
5641 {
5642 loop = LOOP_VINFO_LOOP (loop_vinfo);
5643 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5644 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5645 }
5646 else
c994a226 5647 vf = 1;
fb85abff 5648
5649 /* Multiple types in SLP are handled by creating the appropriate number of
282bf14c 5650 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
fb85abff 5651 case of SLP. */
bc937a44 5652 if (slp || PURE_SLP_STMT (stmt_info))
fb85abff 5653 ncopies = 1;
5654 else
5655 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5656
5657 gcc_assert (ncopies >= 1);
5658
5659 /* FORNOW. This restriction should be relaxed. */
5660 if (nested_in_vect_loop && ncopies > 1)
5661 {
6d8fb6cf 5662 if (dump_enabled_p ())
7bd765d4 5663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5664 "multiple types in nested loop.\n");
fb85abff 5665 return false;
5666 }
5667
a8cf7702 5668 /* Invalidate assumptions made by dependence analysis when vectorization
5669 on the unrolled body effectively re-orders stmts. */
5670 if (ncopies > 1
5671 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5672 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5673 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5674 {
5675 if (dump_enabled_p ())
5676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5677 "cannot perform implicit CSE when unrolling "
5678 "with negative dependence distance\n");
5679 return false;
5680 }
5681
37545e54 5682 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
fb85abff 5683 return false;
5684
f083cd24 5685 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
fb85abff 5686 return false;
5687
5688 /* Is vectorizable load? */
5689 if (!is_gimple_assign (stmt))
5690 return false;
5691
5692 scalar_dest = gimple_assign_lhs (stmt);
5693 if (TREE_CODE (scalar_dest) != SSA_NAME)
5694 return false;
5695
5696 code = gimple_assign_rhs_code (stmt);
5697 if (code != ARRAY_REF
19bacd59 5698 && code != BIT_FIELD_REF
fb85abff 5699 && code != INDIRECT_REF
10095225 5700 && code != COMPONENT_REF
5701 && code != IMAGPART_EXPR
182cf5a9 5702 && code != REALPART_EXPR
b5ba3316 5703 && code != MEM_REF
5704 && TREE_CODE_CLASS (code) != tcc_declaration)
fb85abff 5705 return false;
5706
5707 if (!STMT_VINFO_DATA_REF (stmt_info))
5708 return false;
5709
6960a794 5710 elem_type = TREE_TYPE (vectype);
d6bf3b14 5711 mode = TYPE_MODE (vectype);
fb85abff 5712
5713 /* FORNOW. In some cases can vectorize even if data-type not supported
5714 (e.g. - data copies). */
d6bf3b14 5715 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
fb85abff 5716 {
6d8fb6cf 5717 if (dump_enabled_p ())
7bd765d4 5718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5719 "Aligned load, but unsupported type.\n");
fb85abff 5720 return false;
5721 }
5722
fb85abff 5723 /* Check if the load is a part of an interleaving chain. */
ee612634 5724 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
fb85abff 5725 {
ee612634 5726 grouped_load = true;
fb85abff 5727 /* FORNOW */
16dfb112 5728 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
fb85abff 5729
21009880 5730 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
481fc474 5731 if (!slp && !PURE_SLP_STMT (stmt_info))
5732 {
21009880 5733 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
94b7b4dd 5734 if (vect_load_lanes_supported (vectype, group_size))
5735 load_lanes_p = true;
ee612634 5736 else if (!vect_grouped_load_supported (vectype, group_size))
481fc474 5737 return false;
5738 }
a8cf7702 5739
5740 /* Invalidate assumptions made by dependence analysis when vectorization
5741 on the unrolled body effectively re-orders stmts. */
5742 if (!PURE_SLP_STMT (stmt_info)
5743 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5744 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5745 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5746 {
5747 if (dump_enabled_p ())
5748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5749 "cannot perform implicit CSE when performing "
5750 "group loads with negative dependence distance\n");
5751 return false;
5752 }
fb85abff 5753 }
5754
1aa22f27 5755
16dfb112 5756 if (STMT_VINFO_GATHER_P (stmt_info))
5757 {
5758 gimple def_stmt;
5759 tree def;
5760 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5761 &gather_off, &gather_scale);
5762 gcc_assert (gather_decl);
bed8b93b 5763 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
16dfb112 5764 &def_stmt, &def, &gather_dt,
5765 &gather_off_vectype))
5766 {
6d8fb6cf 5767 if (dump_enabled_p ())
7bd765d4 5768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5769 "gather index use not simple.\n");
16dfb112 5770 return false;
5771 }
5772 }
7a10f06b 5773 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
bc17236a 5774 ;
f634c3e9 5775 else
5776 {
5777 negative = tree_int_cst_compare (nested_in_vect_loop
5778 ? STMT_VINFO_DR_STEP (stmt_info)
5779 : DR_STEP (dr),
5780 size_zero_node) < 0;
5781 if (negative && ncopies > 1)
5782 {
6d8fb6cf 5783 if (dump_enabled_p ())
7bd765d4 5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5785 "multiple types with negative step.\n");
f634c3e9 5786 return false;
5787 }
5788
5789 if (negative)
5790 {
8bbe6b75 5791 if (grouped_load)
5792 {
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5795 "negative step for group load not supported"
5796 "\n");
8bbe6b75 5797 return false;
5798 }
f634c3e9 5799 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5800 if (alignment_support_scheme != dr_aligned
5801 && alignment_support_scheme != dr_unaligned_supported)
5802 {
6d8fb6cf 5803 if (dump_enabled_p ())
7bd765d4 5804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5805 "negative step but alignment required.\n");
f634c3e9 5806 return false;
5807 }
5808 if (!perm_mask_for_reverse (vectype))
5809 {
6d8fb6cf 5810 if (dump_enabled_p ())
7bd765d4 5811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 5812 "negative step and reversing not supported."
5813 "\n");
f634c3e9 5814 return false;
5815 }
5816 }
7a10f06b 5817 }
16dfb112 5818
fb85abff 5819 if (!vec_stmt) /* transformation not required. */
5820 {
5821 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
f97dec81 5822 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
fb85abff 5823 return true;
5824 }
5825
6d8fb6cf 5826 if (dump_enabled_p ())
7bd765d4 5827 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 5828 "transform load. ncopies = %d\n", ncopies);
fb85abff 5829
5830 /** Transform. **/
5831
23e1875f 5832 ensure_base_align (stmt_info, dr);
5833
16dfb112 5834 if (STMT_VINFO_GATHER_P (stmt_info))
5835 {
5836 tree vec_oprnd0 = NULL_TREE, op;
5837 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5838 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1706116d 5839 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
16dfb112 5840 edge pe = loop_preheader_edge (loop);
5841 gimple_seq seq;
5842 basic_block new_bb;
5843 enum { NARROW, NONE, WIDEN } modifier;
5844 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5845
5846 if (nunits == gather_off_nunits)
5847 modifier = NONE;
5848 else if (nunits == gather_off_nunits / 2)
5849 {
5850 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5851 modifier = WIDEN;
5852
5853 for (i = 0; i < gather_off_nunits; ++i)
5854 sel[i] = i | nunits;
5855
8bec2124 5856 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
16dfb112 5857 gcc_assert (perm_mask != NULL_TREE);
5858 }
5859 else if (nunits == gather_off_nunits * 2)
5860 {
5861 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5862 modifier = NARROW;
5863
5864 for (i = 0; i < nunits; ++i)
5865 sel[i] = i < gather_off_nunits
5866 ? i : i + nunits - gather_off_nunits;
5867
8bec2124 5868 perm_mask = vect_gen_perm_mask (vectype, sel);
16dfb112 5869 gcc_assert (perm_mask != NULL_TREE);
5870 ncopies *= 2;
5871 }
5872 else
5873 gcc_unreachable ();
5874
5875 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5876 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5877 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5878 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5879 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5880 scaletype = TREE_VALUE (arglist);
1706116d 5881 gcc_checking_assert (types_compatible_p (srctype, rettype));
16dfb112 5882
5883 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5884
5885 ptr = fold_convert (ptrtype, gather_base);
5886 if (!is_gimple_min_invariant (ptr))
5887 {
5888 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5889 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5890 gcc_assert (!new_bb);
5891 }
5892
5893 /* Currently we support only unconditional gather loads,
5894 so mask should be all ones. */
1706116d 5895 if (TREE_CODE (masktype) == INTEGER_TYPE)
5896 mask = build_int_cst (masktype, -1);
5897 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5898 {
5899 mask = build_int_cst (TREE_TYPE (masktype), -1);
5900 mask = build_vector_from_val (masktype, mask);
d7bcf3d1 5901 mask = vect_init_vector (stmt, mask, masktype, NULL);
1706116d 5902 }
16dfb112 5903 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5904 {
5905 REAL_VALUE_TYPE r;
5906 long tmp[6];
5907 for (j = 0; j < 6; ++j)
5908 tmp[j] = -1;
5909 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5910 mask = build_real (TREE_TYPE (masktype), r);
1706116d 5911 mask = build_vector_from_val (masktype, mask);
d7bcf3d1 5912 mask = vect_init_vector (stmt, mask, masktype, NULL);
16dfb112 5913 }
5914 else
5915 gcc_unreachable ();
16dfb112 5916
5917 scale = build_int_cst (scaletype, gather_scale);
5918
1706116d 5919 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5920 merge = build_int_cst (TREE_TYPE (rettype), 0);
5921 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5922 {
5923 REAL_VALUE_TYPE r;
5924 long tmp[6];
5925 for (j = 0; j < 6; ++j)
5926 tmp[j] = 0;
5927 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5928 merge = build_real (TREE_TYPE (rettype), r);
5929 }
5930 else
5931 gcc_unreachable ();
5932 merge = build_vector_from_val (rettype, merge);
5933 merge = vect_init_vector (stmt, merge, rettype, NULL);
5934
16dfb112 5935 prev_stmt_info = NULL;
5936 for (j = 0; j < ncopies; ++j)
5937 {
5938 if (modifier == WIDEN && (j & 1))
5939 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5940 perm_mask, stmt, gsi);
5941 else if (j == 0)
5942 op = vec_oprnd0
5943 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5944 else
5945 op = vec_oprnd0
5946 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5947
5948 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5949 {
5950 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5951 == TYPE_VECTOR_SUBPARTS (idxtype));
5952 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
16dfb112 5953 var = make_ssa_name (var, NULL);
5954 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5955 new_stmt
5956 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5957 op, NULL_TREE);
5958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5959 op = var;
5960 }
5961
5962 new_stmt
1706116d 5963 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
16dfb112 5964
5965 if (!useless_type_conversion_p (vectype, rettype))
5966 {
5967 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5968 == TYPE_VECTOR_SUBPARTS (rettype));
5969 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
16dfb112 5970 op = make_ssa_name (var, new_stmt);
5971 gimple_call_set_lhs (new_stmt, op);
5972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5973 var = make_ssa_name (vec_dest, NULL);
5974 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5975 new_stmt
5976 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5977 NULL_TREE);
5978 }
5979 else
5980 {
5981 var = make_ssa_name (vec_dest, new_stmt);
5982 gimple_call_set_lhs (new_stmt, var);
5983 }
5984
5985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5986
5987 if (modifier == NARROW)
5988 {
5989 if ((j & 1) == 0)
5990 {
5991 prev_res = var;
5992 continue;
5993 }
5994 var = permute_vec_elements (prev_res, var,
5995 perm_mask, stmt, gsi);
5996 new_stmt = SSA_NAME_DEF_STMT (var);
5997 }
5998
5999 if (prev_stmt_info == NULL)
6000 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6001 else
6002 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6003 prev_stmt_info = vinfo_for_stmt (new_stmt);
6004 }
6005 return true;
6006 }
7a10f06b 6007 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6008 {
6009 gimple_stmt_iterator incr_gsi;
6010 bool insert_after;
6011 gimple incr;
6012 tree offvar;
7a10f06b 6013 tree ivstep;
6014 tree running_off;
f1f41a6c 6015 vec<constructor_elt, va_gc> *v = NULL;
7a10f06b 6016 gimple_seq stmts = NULL;
bc17236a 6017 tree stride_base, stride_step, alias_off;
6018
6019 gcc_assert (!nested_in_vect_loop);
7a10f06b 6020
bc17236a 6021 stride_base
6022 = fold_build_pointer_plus
6023 (unshare_expr (DR_BASE_ADDRESS (dr)),
6024 size_binop (PLUS_EXPR,
6025 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
9af5ce0c 6026 convert_to_ptrofftype (DR_INIT (dr))));
bc17236a 6027 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
7a10f06b 6028
6029 /* For a load with loop-invariant (but other than power-of-2)
6030 stride (i.e. not a grouped access) like so:
6031
6032 for (i = 0; i < n; i += stride)
6033 ... = array[i];
6034
6035 we generate a new induction variable and new accesses to
6036 form a new vector (or vectors, depending on ncopies):
6037
6038 for (j = 0; ; j += VF*stride)
6039 tmp1 = array[j];
6040 tmp2 = array[j + stride];
6041 ...
6042 vectemp = {tmp1, tmp2, ...}
6043 */
6044
6045 ivstep = stride_step;
6046 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6047 build_int_cst (TREE_TYPE (ivstep), vf));
6048
6049 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6050
6051 create_iv (stride_base, ivstep, NULL,
6052 loop, &incr_gsi, insert_after,
6053 &offvar, NULL);
6054 incr = gsi_stmt (incr_gsi);
6055 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6056
6057 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6058 if (stmts)
6059 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6060
6061 prev_stmt_info = NULL;
6062 running_off = offvar;
bc17236a 6063 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
7a10f06b 6064 for (j = 0; j < ncopies; j++)
6065 {
6066 tree vec_inv;
6067
f1f41a6c 6068 vec_alloc (v, nunits);
7a10f06b 6069 for (i = 0; i < nunits; i++)
6070 {
6071 tree newref, newoff;
6072 gimple incr;
bc17236a 6073 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6074 running_off, alias_off);
7a10f06b 6075
6076 newref = force_gimple_operand_gsi (gsi, newref, true,
6077 NULL_TREE, true,
6078 GSI_SAME_STMT);
6079 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
874117c8 6080 newoff = copy_ssa_name (running_off, NULL);
bc17236a 6081 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6082 running_off, stride_step);
7a10f06b 6083 vect_finish_stmt_generation (stmt, incr, gsi);
6084
6085 running_off = newoff;
6086 }
6087
6088 vec_inv = build_constructor (vectype, v);
6089 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6090 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7a10f06b 6091
6092 if (j == 0)
6093 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6094 else
6095 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6096 prev_stmt_info = vinfo_for_stmt (new_stmt);
6097 }
6098 return true;
6099 }
16dfb112 6100
ee612634 6101 if (grouped_load)
fb85abff 6102 {
21009880 6103 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
24c271ef 6104 if (slp
678e3d6e 6105 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
f1f41a6c 6106 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6107 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
24c271ef 6108
fb85abff 6109 /* Check if the chain of loads is already vectorized. */
678e3d6e 6110 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6111 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6112 ??? But we can only do so if there is exactly one
6113 as we have no way to get at the rest. Leave the CSE
6114 opportunity alone.
6115 ??? With the group load eventually participating
6116 in multiple different permutations (having multiple
6117 slp nodes which refer to the same group) the CSE
6118 is even wrong code. See PR56270. */
6119 && !slp)
fb85abff 6120 {
6121 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6122 return true;
6123 }
6124 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
21009880 6125 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
fb85abff 6126
6127 /* VEC_NUM is the number of vect stmts to be created for this group. */
6128 if (slp)
6129 {
ee612634 6130 grouped_load = false;
fb85abff 6131 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
678e3d6e 6132 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
37545e54 6133 slp_perm = true;
8f3e4987 6134 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
37545e54 6135 }
fb85abff 6136 else
8f3e4987 6137 {
6138 vec_num = group_size;
6139 group_gap = 0;
6140 }
fb85abff 6141 }
6142 else
6143 {
6144 first_stmt = stmt;
6145 first_dr = dr;
6146 group_size = vec_num = 1;
8f3e4987 6147 group_gap = 0;
fb85abff 6148 }
6149
0822b158 6150 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
fb85abff 6151 gcc_assert (alignment_support_scheme);
94b7b4dd 6152 /* Targets with load-lane instructions must not require explicit
6153 realignment. */
6154 gcc_assert (!load_lanes_p
6155 || alignment_support_scheme == dr_aligned
6156 || alignment_support_scheme == dr_unaligned_supported);
fb85abff 6157
6158 /* In case the vectorization factor (VF) is bigger than the number
6159 of elements that we can fit in a vectype (nunits), we have to generate
6160 more than one vector stmt - i.e - we need to "unroll" the
282bf14c 6161 vector stmt by a factor VF/nunits. In doing so, we record a pointer
fb85abff 6162 from one copy of the vector stmt to the next, in the field
282bf14c 6163 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
fb85abff 6164 stages to find the correct vector defs to be used when vectorizing
282bf14c 6165 stmts that use the defs of the current stmt. The example below
6166 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6167 need to create 4 vectorized stmts):
fb85abff 6168
6169 before vectorization:
6170 RELATED_STMT VEC_STMT
6171 S1: x = memref - -
6172 S2: z = x + 1 - -
6173
6174 step 1: vectorize stmt S1:
6175 We first create the vector stmt VS1_0, and, as usual, record a
6176 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6177 Next, we create the vector stmt VS1_1, and record a pointer to
6178 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
282bf14c 6179 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
fb85abff 6180 stmts and pointers:
6181 RELATED_STMT VEC_STMT
6182 VS1_0: vx0 = memref0 VS1_1 -
6183 VS1_1: vx1 = memref1 VS1_2 -
6184 VS1_2: vx2 = memref2 VS1_3 -
6185 VS1_3: vx3 = memref3 - -
6186 S1: x = load - VS1_0
6187 S2: z = x + 1 - -
6188
48e1416a 6189 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6190 information we recorded in RELATED_STMT field is used to vectorize
fb85abff 6191 stmt S2. */
6192
ee612634 6193 /* In case of interleaving (non-unit grouped access):
fb85abff 6194
6195 S1: x2 = &base + 2
6196 S2: x0 = &base
6197 S3: x1 = &base + 1
6198 S4: x3 = &base + 3
6199
48e1416a 6200 Vectorized loads are created in the order of memory accesses
fb85abff 6201 starting from the access of the first stmt of the chain:
6202
6203 VS1: vx0 = &base
6204 VS2: vx1 = &base + vec_size*1
6205 VS3: vx3 = &base + vec_size*2
6206 VS4: vx4 = &base + vec_size*3
6207
6208 Then permutation statements are generated:
6209
42f6a6e8 6210 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6211 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
fb85abff 6212 ...
6213
6214 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6215 (the order of the data-refs in the output of vect_permute_load_chain
6216 corresponds to the order of scalar stmts in the interleaving chain - see
6217 the documentation of vect_permute_load_chain()).
6218 The generation of permutation stmts and recording them in
ee612634 6219 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
fb85abff 6220
48e1416a 6221 In case of both multiple types and interleaving, the vector loads and
282bf14c 6222 permutation stmts above are created for every copy. The result vector
6223 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6224 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
fb85abff 6225
6226 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6227 on a target that supports unaligned accesses (dr_unaligned_supported)
6228 we generate the following code:
6229 p = initial_addr;
6230 indx = 0;
6231 loop {
6232 p = p + indx * vectype_size;
6233 vec_dest = *(p);
6234 indx = indx + 1;
6235 }
6236
6237 Otherwise, the data reference is potentially unaligned on a target that
48e1416a 6238 does not support unaligned accesses (dr_explicit_realign_optimized) -
fb85abff 6239 then generate the following code, in which the data in each iteration is
6240 obtained by two vector loads, one from the previous iteration, and one
6241 from the current iteration:
6242 p1 = initial_addr;
6243 msq_init = *(floor(p1))
6244 p2 = initial_addr + VS - 1;
6245 realignment_token = call target_builtin;
6246 indx = 0;
6247 loop {
6248 p2 = p2 + indx * vectype_size
6249 lsq = *(floor(p2))
6250 vec_dest = realign_load (msq, lsq, realignment_token)
6251 indx = indx + 1;
6252 msq = lsq;
6253 } */
6254
6255 /* If the misalignment remains the same throughout the execution of the
6256 loop, we can create the init_addr and permutation mask at the loop
282bf14c 6257 preheader. Otherwise, it needs to be created inside the loop.
fb85abff 6258 This can only occur when vectorizing memory accesses in the inner-loop
6259 nested within an outer-loop that is being vectorized. */
6260
7b0821ca 6261 if (nested_in_vect_loop
31a56b87 6262 && (TREE_INT_CST_LOW (DR_STEP (dr))
fb85abff 6263 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6264 {
6265 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6266 compute_in_loop = true;
6267 }
6268
6269 if ((alignment_support_scheme == dr_explicit_realign_optimized
6270 || alignment_support_scheme == dr_explicit_realign)
6f75c845 6271 && !compute_in_loop)
fb85abff 6272 {
6273 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6274 alignment_support_scheme, NULL_TREE,
6275 &at_loop);
6276 if (alignment_support_scheme == dr_explicit_realign_optimized)
6277 {
6278 phi = SSA_NAME_DEF_STMT (msq);
1ec61bbd 6279 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6280 size_one_node);
fb85abff 6281 }
6282 }
6283 else
6284 at_loop = loop;
6285
1aa22f27 6286 if (negative)
6287 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6288
94b7b4dd 6289 if (load_lanes_p)
6290 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6291 else
6292 aggr_type = vectype;
6293
fb85abff 6294 prev_stmt_info = NULL;
6295 for (j = 0; j < ncopies; j++)
48e1416a 6296 {
94b7b4dd 6297 /* 1. Create the vector or array pointer update chain. */
fb85abff 6298 if (j == 0)
3d483a94 6299 {
6300 bool simd_lane_access_p
6301 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6302 if (simd_lane_access_p
6303 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6304 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6305 && integer_zerop (DR_OFFSET (first_dr))
6306 && integer_zerop (DR_INIT (first_dr))
6307 && alias_sets_conflict_p (get_alias_set (aggr_type),
6308 get_alias_set (DR_REF (first_dr)))
6309 && (alignment_support_scheme == dr_aligned
6310 || alignment_support_scheme == dr_unaligned_supported))
6311 {
6312 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6313 dataref_offset = build_int_cst (reference_alias_ptr_type
6314 (DR_REF (first_dr)), 0);
8e1a382d 6315 inv_p = false;
3d483a94 6316 }
6317 else
6318 dataref_ptr
6319 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6320 offset, &dummy, gsi, &ptr_incr,
1ec61bbd 6321 simd_lane_access_p, &inv_p,
6322 byte_offset);
3d483a94 6323 }
6324 else if (dataref_offset)
6325 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6326 TYPE_SIZE_UNIT (aggr_type));
fb85abff 6327 else
94b7b4dd 6328 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6329 TYPE_SIZE_UNIT (aggr_type));
fb85abff 6330
ee612634 6331 if (grouped_load || slp_perm)
f1f41a6c 6332 dr_chain.create (vec_num);
362eeee7 6333
94b7b4dd 6334 if (load_lanes_p)
fb85abff 6335 {
94b7b4dd 6336 tree vec_array;
6337
6338 vec_array = create_vector_array (vectype, vec_num);
6339
6340 /* Emit:
6341 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6342 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6343 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6344 gimple_call_set_lhs (new_stmt, vec_array);
6345 vect_finish_stmt_generation (stmt, new_stmt, gsi);
fb85abff 6346
94b7b4dd 6347 /* Extract each vector into an SSA_NAME. */
6348 for (i = 0; i < vec_num; i++)
fb85abff 6349 {
94b7b4dd 6350 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6351 vec_array, i);
f1f41a6c 6352 dr_chain.quick_push (new_temp);
94b7b4dd 6353 }
6354
6355 /* Record the mapping between SSA_NAMEs and statements. */
ee612634 6356 vect_record_grouped_load_vectors (stmt, dr_chain);
94b7b4dd 6357 }
6358 else
6359 {
6360 for (i = 0; i < vec_num; i++)
6361 {
6362 if (i > 0)
6363 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6364 stmt, NULL_TREE);
6365
6366 /* 2. Create the vector-load in the loop. */
6367 switch (alignment_support_scheme)
6368 {
6369 case dr_aligned:
6370 case dr_unaligned_supported:
5d9de213 6371 {
ceea063b 6372 unsigned int align, misalign;
6373
94b7b4dd 6374 data_ref
6375 = build2 (MEM_REF, vectype, dataref_ptr,
3d483a94 6376 dataref_offset
6377 ? dataref_offset
6378 : build_int_cst (reference_alias_ptr_type
6379 (DR_REF (first_dr)), 0));
ceea063b 6380 align = TYPE_ALIGN_UNIT (vectype);
94b7b4dd 6381 if (alignment_support_scheme == dr_aligned)
6382 {
6383 gcc_assert (aligned_access_p (first_dr));
ceea063b 6384 misalign = 0;
94b7b4dd 6385 }
6386 else if (DR_MISALIGNMENT (first_dr) == -1)
6387 {
6388 TREE_TYPE (data_ref)
6389 = build_aligned_type (TREE_TYPE (data_ref),
6390 TYPE_ALIGN (elem_type));
ceea063b 6391 align = TYPE_ALIGN_UNIT (elem_type);
6392 misalign = 0;
94b7b4dd 6393 }
6394 else
6395 {
6396 TREE_TYPE (data_ref)
6397 = build_aligned_type (TREE_TYPE (data_ref),
6398 TYPE_ALIGN (elem_type));
ceea063b 6399 misalign = DR_MISALIGNMENT (first_dr);
94b7b4dd 6400 }
3d483a94 6401 if (dataref_offset == NULL_TREE)
6402 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6403 align, misalign);
94b7b4dd 6404 break;
5d9de213 6405 }
94b7b4dd 6406 case dr_explicit_realign:
ff99c834 6407 {
94b7b4dd 6408 tree ptr, bump;
6409 tree vs_minus_1;
6410
6411 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6412
6413 if (compute_in_loop)
6414 msq = vect_setup_realignment (first_stmt, gsi,
6415 &realignment_token,
6416 dr_explicit_realign,
6417 dataref_ptr, NULL);
6418
874117c8 6419 ptr = copy_ssa_name (dataref_ptr, NULL);
94b7b4dd 6420 new_stmt = gimple_build_assign_with_ops
874117c8 6421 (BIT_AND_EXPR, ptr, dataref_ptr,
94b7b4dd 6422 build_int_cst
6423 (TREE_TYPE (dataref_ptr),
6424 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
94b7b4dd 6425 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6426 data_ref
6427 = build2 (MEM_REF, vectype, ptr,
6428 build_int_cst (reference_alias_ptr_type
6429 (DR_REF (first_dr)), 0));
6430 vec_dest = vect_create_destination_var (scalar_dest,
6431 vectype);
6432 new_stmt = gimple_build_assign (vec_dest, data_ref);
6433 new_temp = make_ssa_name (vec_dest, new_stmt);
6434 gimple_assign_set_lhs (new_stmt, new_temp);
6435 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6436 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6438 msq = new_temp;
6439
6440 bump = size_binop (MULT_EXPR, vs_minus_1,
6960a794 6441 TYPE_SIZE_UNIT (elem_type));
94b7b4dd 6442 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6443 new_stmt = gimple_build_assign_with_ops
6444 (BIT_AND_EXPR, NULL_TREE, ptr,
6445 build_int_cst
6446 (TREE_TYPE (ptr),
6447 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
874117c8 6448 ptr = copy_ssa_name (dataref_ptr, new_stmt);
94b7b4dd 6449 gimple_assign_set_lhs (new_stmt, ptr);
6450 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6451 data_ref
6452 = build2 (MEM_REF, vectype, ptr,
6453 build_int_cst (reference_alias_ptr_type
6454 (DR_REF (first_dr)), 0));
6455 break;
ff99c834 6456 }
94b7b4dd 6457 case dr_explicit_realign_optimized:
874117c8 6458 new_temp = copy_ssa_name (dataref_ptr, NULL);
94b7b4dd 6459 new_stmt = gimple_build_assign_with_ops
874117c8 6460 (BIT_AND_EXPR, new_temp, dataref_ptr,
94b7b4dd 6461 build_int_cst
6462 (TREE_TYPE (dataref_ptr),
6463 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
94b7b4dd 6464 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6465 data_ref
6466 = build2 (MEM_REF, vectype, new_temp,
6467 build_int_cst (reference_alias_ptr_type
6468 (DR_REF (first_dr)), 0));
6469 break;
6470 default:
6471 gcc_unreachable ();
6472 }
fb85abff 6473 vec_dest = vect_create_destination_var (scalar_dest, vectype);
94b7b4dd 6474 new_stmt = gimple_build_assign (vec_dest, data_ref);
fb85abff 6475 new_temp = make_ssa_name (vec_dest, new_stmt);
6476 gimple_assign_set_lhs (new_stmt, new_temp);
6477 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6478
94b7b4dd 6479 /* 3. Handle explicit realignment if necessary/supported.
6480 Create in loop:
6481 vec_dest = realign_load (msq, lsq, realignment_token) */
6482 if (alignment_support_scheme == dr_explicit_realign_optimized
6483 || alignment_support_scheme == dr_explicit_realign)
fb85abff 6484 {
94b7b4dd 6485 lsq = gimple_assign_lhs (new_stmt);
6486 if (!realignment_token)
6487 realignment_token = dataref_ptr;
6488 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6489 new_stmt
446e85eb 6490 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6491 vec_dest, msq, lsq,
6492 realignment_token);
94b7b4dd 6493 new_temp = make_ssa_name (vec_dest, new_stmt);
6494 gimple_assign_set_lhs (new_stmt, new_temp);
6495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6496
6497 if (alignment_support_scheme == dr_explicit_realign_optimized)
6498 {
6499 gcc_assert (phi);
6500 if (i == vec_num - 1 && j == ncopies - 1)
6501 add_phi_arg (phi, lsq,
6502 loop_latch_edge (containing_loop),
60d535d2 6503 UNKNOWN_LOCATION);
94b7b4dd 6504 msq = lsq;
6505 }
fb85abff 6506 }
fb85abff 6507
6f75c845 6508 /* 4. Handle invariant-load. */
6509 if (inv_p && !bb_vinfo)
6510 {
6f75c845 6511 gcc_assert (!grouped_load);
c7a8722c 6512 /* If we have versioned for aliasing or the loop doesn't
6513 have any data dependencies that would preclude this,
6514 then we are sure this is a loop invariant load and
6515 thus we can insert it on the preheader edge. */
6516 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6517 && !nested_in_vect_loop
500fffe1 6518 && hoist_defs_of_uses (stmt, loop))
546d1cc8 6519 {
6520 if (dump_enabled_p ())
6521 {
6522 dump_printf_loc (MSG_NOTE, vect_location,
6523 "hoisting out of the vectorized "
6524 "loop: ");
6525 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6526 dump_printf (MSG_NOTE, "\n");
6527 }
6528 tree tem = copy_ssa_name (scalar_dest, NULL);
6529 gsi_insert_on_edge_immediate
6530 (loop_preheader_edge (loop),
6531 gimple_build_assign (tem,
6532 unshare_expr
6533 (gimple_assign_rhs1 (stmt))));
6534 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6535 }
6536 else
6537 {
6538 gimple_stmt_iterator gsi2 = *gsi;
6539 gsi_next (&gsi2);
6540 new_temp = vect_init_vector (stmt, scalar_dest,
6541 vectype, &gsi2);
6542 }
6f75c845 6543 new_stmt = SSA_NAME_DEF_STMT (new_temp);
546d1cc8 6544 set_vinfo_for_stmt (new_stmt,
6545 new_stmt_vec_info (new_stmt, loop_vinfo,
6546 bb_vinfo));
6f75c845 6547 }
6548
94b7b4dd 6549 if (negative)
6550 {
16dfb112 6551 tree perm_mask = perm_mask_for_reverse (vectype);
6552 new_temp = permute_vec_elements (new_temp, new_temp,
6553 perm_mask, stmt, gsi);
fb85abff 6554 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6555 }
ff99c834 6556
94b7b4dd 6557 /* Collect vector loads and later create their permutation in
ee612634 6558 vect_transform_grouped_load (). */
6559 if (grouped_load || slp_perm)
f1f41a6c 6560 dr_chain.quick_push (new_temp);
ff99c834 6561
94b7b4dd 6562 /* Store vector loads in the corresponding SLP_NODE. */
6563 if (slp && !slp_perm)
f1f41a6c 6564 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
94b7b4dd 6565 }
8f3e4987 6566 /* Bump the vector pointer to account for a gap. */
6567 if (slp && group_gap != 0)
6568 {
6569 tree bump = size_binop (MULT_EXPR,
6570 TYPE_SIZE_UNIT (elem_type),
6571 size_int (group_gap));
6572 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6573 stmt, bump);
6574 }
fb85abff 6575 }
6576
6577 if (slp && !slp_perm)
6578 continue;
6579
6580 if (slp_perm)
6581 {
678e3d6e 6582 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
fb85abff 6583 slp_node_instance, false))
6584 {
f1f41a6c 6585 dr_chain.release ();
fb85abff 6586 return false;
6587 }
6588 }
6589 else
6590 {
ee612634 6591 if (grouped_load)
fb85abff 6592 {
94b7b4dd 6593 if (!load_lanes_p)
ee612634 6594 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
fb85abff 6595 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
fb85abff 6596 }
6597 else
6598 {
6599 if (j == 0)
6600 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6601 else
6602 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6603 prev_stmt_info = vinfo_for_stmt (new_stmt);
6604 }
6605 }
f1f41a6c 6606 dr_chain.release ();
fb85abff 6607 }
6608
fb85abff 6609 return true;
6610}
6611
6612/* Function vect_is_simple_cond.
48e1416a 6613
fb85abff 6614 Input:
6615 LOOP - the loop that is being vectorized.
6616 COND - Condition that is checked for simple use.
6617
d6b19f6b 6618 Output:
6619 *COMP_VECTYPE - the vector type for the comparison.
6620
fb85abff 6621 Returns whether a COND can be vectorized. Checks whether
6622 condition operands are supportable using vec_is_simple_use. */
6623
f39dd90c 6624static bool
bed8b93b 6625vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6626 bb_vec_info bb_vinfo, tree *comp_vectype)
fb85abff 6627{
6628 tree lhs, rhs;
6629 tree def;
6630 enum vect_def_type dt;
d6b19f6b 6631 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
fb85abff 6632
6633 if (!COMPARISON_CLASS_P (cond))
6634 return false;
6635
6636 lhs = TREE_OPERAND (cond, 0);
6637 rhs = TREE_OPERAND (cond, 1);
6638
6639 if (TREE_CODE (lhs) == SSA_NAME)
6640 {
6641 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
bed8b93b 6642 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6643 &lhs_def_stmt, &def, &dt, &vectype1))
fb85abff 6644 return false;
6645 }
6646 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6647 && TREE_CODE (lhs) != FIXED_CST)
6648 return false;
6649
6650 if (TREE_CODE (rhs) == SSA_NAME)
6651 {
6652 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
bed8b93b 6653 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6654 &rhs_def_stmt, &def, &dt, &vectype2))
fb85abff 6655 return false;
6656 }
f2104a54 6657 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
fb85abff 6658 && TREE_CODE (rhs) != FIXED_CST)
6659 return false;
6660
d6b19f6b 6661 *comp_vectype = vectype1 ? vectype1 : vectype2;
fb85abff 6662 return true;
6663}
6664
6665/* vectorizable_condition.
6666
48e1416a 6667 Check if STMT is conditional modify expression that can be vectorized.
6668 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6669 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
0df23b96 6670 at GSI.
6671
6672 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6673 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6674 else caluse if it is 2).
fb85abff 6675
6676 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6677
0df23b96 6678bool
fb85abff 6679vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
f2104a54 6680 gimple *vec_stmt, tree reduc_def, int reduc_index,
6681 slp_tree slp_node)
fb85abff 6682{
6683 tree scalar_dest = NULL_TREE;
6684 tree vec_dest = NULL_TREE;
fb85abff 6685 tree cond_expr, then_clause, else_clause;
6686 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6687 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
57fb023f 6688 tree comp_vectype = NULL_TREE;
282bf14c 6689 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6690 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
fb85abff 6691 tree vec_compare, vec_cond_expr;
6692 tree new_temp;
6693 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
fb85abff 6694 tree def;
f05c3393 6695 enum vect_def_type dt, dts[4];
fb85abff 6696 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
f2104a54 6697 int ncopies;
fb85abff 6698 enum tree_code code;
f05c3393 6699 stmt_vec_info prev_stmt_info = NULL;
f2104a54 6700 int i, j;
6701 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1e094109 6702 vec<tree> vec_oprnds0 = vNULL;
6703 vec<tree> vec_oprnds1 = vNULL;
6704 vec<tree> vec_oprnds2 = vNULL;
6705 vec<tree> vec_oprnds3 = vNULL;
43d7caca 6706 tree vec_cmp_type;
48e1416a 6707
f2104a54 6708 if (slp_node || PURE_SLP_STMT (stmt_info))
6709 ncopies = 1;
6710 else
6711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
bc937a44 6712
fb85abff 6713 gcc_assert (ncopies >= 1);
f05c3393 6714 if (reduc_index && ncopies > 1)
fb85abff 6715 return false; /* FORNOW */
6716
f2104a54 6717 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6718 return false;
6719
6720 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
fb85abff 6721 return false;
6722
0df23b96 6723 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6724 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6725 && reduc_def))
fb85abff 6726 return false;
6727
fb85abff 6728 /* FORNOW: not yet supported. */
48e1416a 6729 if (STMT_VINFO_LIVE_P (stmt_info))
fb85abff 6730 {
6d8fb6cf 6731 if (dump_enabled_p ())
7bd765d4 6732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 6733 "value used after loop.\n");
fb85abff 6734 return false;
6735 }
6736
6737 /* Is vectorizable conditional operation? */
6738 if (!is_gimple_assign (stmt))
6739 return false;
6740
6741 code = gimple_assign_rhs_code (stmt);
6742
6743 if (code != COND_EXPR)
6744 return false;
6745
8a2caf10 6746 cond_expr = gimple_assign_rhs1 (stmt);
6747 then_clause = gimple_assign_rhs2 (stmt);
6748 else_clause = gimple_assign_rhs3 (stmt);
fb85abff 6749
bed8b93b 6750 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6751 &comp_vectype)
d6b19f6b 6752 || !comp_vectype)
fb85abff 6753 return false;
6754
6755 if (TREE_CODE (then_clause) == SSA_NAME)
6756 {
6757 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
bed8b93b 6758 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
fb85abff 6759 &then_def_stmt, &def, &dt))
6760 return false;
6761 }
48e1416a 6762 else if (TREE_CODE (then_clause) != INTEGER_CST
fb85abff 6763 && TREE_CODE (then_clause) != REAL_CST
6764 && TREE_CODE (then_clause) != FIXED_CST)
6765 return false;
6766
6767 if (TREE_CODE (else_clause) == SSA_NAME)
6768 {
6769 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
bed8b93b 6770 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
fb85abff 6771 &else_def_stmt, &def, &dt))
6772 return false;
6773 }
48e1416a 6774 else if (TREE_CODE (else_clause) != INTEGER_CST
fb85abff 6775 && TREE_CODE (else_clause) != REAL_CST
6776 && TREE_CODE (else_clause) != FIXED_CST)
6777 return false;
6778
43d7caca 6779 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6780 /* The result of a vector comparison should be signed type. */
6781 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6782 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6783 if (vec_cmp_type == NULL_TREE)
6784 return false;
f72ca119 6785
48e1416a 6786 if (!vec_stmt)
fb85abff 6787 {
6788 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
d6b19f6b 6789 return expand_vec_cond_expr_p (vectype, comp_vectype);
fb85abff 6790 }
6791
f2104a54 6792 /* Transform. */
6793
6794 if (!slp_node)
6795 {
f1f41a6c 6796 vec_oprnds0.create (1);
6797 vec_oprnds1.create (1);
6798 vec_oprnds2.create (1);
6799 vec_oprnds3.create (1);
f2104a54 6800 }
fb85abff 6801
6802 /* Handle def. */
6803 scalar_dest = gimple_assign_lhs (stmt);
6804 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6805
6806 /* Handle cond expr. */
f05c3393 6807 for (j = 0; j < ncopies; j++)
6808 {
f2104a54 6809 gimple new_stmt = NULL;
f05c3393 6810 if (j == 0)
6811 {
f2104a54 6812 if (slp_node)
6813 {
4997014d 6814 auto_vec<tree, 4> ops;
6815 auto_vec<vec<tree>, 4> vec_defs;
f1f41a6c 6816
f1f41a6c 6817 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6818 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6819 ops.safe_push (then_clause);
6820 ops.safe_push (else_clause);
f2104a54 6821 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7f7695a7 6822 vec_oprnds3 = vec_defs.pop ();
6823 vec_oprnds2 = vec_defs.pop ();
6824 vec_oprnds1 = vec_defs.pop ();
6825 vec_oprnds0 = vec_defs.pop ();
f2104a54 6826
f1f41a6c 6827 ops.release ();
6828 vec_defs.release ();
f2104a54 6829 }
6830 else
6831 {
6832 gimple gtemp;
6833 vec_cond_lhs =
f05c3393 6834 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6835 stmt, NULL);
bed8b93b 6836 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6837 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
f2104a54 6838
6839 vec_cond_rhs =
6840 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6841 stmt, NULL);
bed8b93b 6842 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6843 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
f2104a54 6844 if (reduc_index == 1)
6845 vec_then_clause = reduc_def;
6846 else
6847 {
6848 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6849 stmt, NULL);
bed8b93b 6850 vect_is_simple_use (then_clause, stmt, loop_vinfo,
f2104a54 6851 NULL, &gtemp, &def, &dts[2]);
6852 }
6853 if (reduc_index == 2)
6854 vec_else_clause = reduc_def;
6855 else
6856 {
6857 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
f05c3393 6858 stmt, NULL);
bed8b93b 6859 vect_is_simple_use (else_clause, stmt, loop_vinfo,
f05c3393 6860 NULL, &gtemp, &def, &dts[3]);
f2104a54 6861 }
f05c3393 6862 }
6863 }
6864 else
6865 {
f2104a54 6866 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
f1f41a6c 6867 vec_oprnds0.pop ());
f2104a54 6868 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
f1f41a6c 6869 vec_oprnds1.pop ());
f05c3393 6870 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
f1f41a6c 6871 vec_oprnds2.pop ());
f05c3393 6872 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
f1f41a6c 6873 vec_oprnds3.pop ());
f2104a54 6874 }
6875
6876 if (!slp_node)
6877 {
f1f41a6c 6878 vec_oprnds0.quick_push (vec_cond_lhs);
6879 vec_oprnds1.quick_push (vec_cond_rhs);
6880 vec_oprnds2.quick_push (vec_then_clause);
6881 vec_oprnds3.quick_push (vec_else_clause);
f05c3393 6882 }
6883
09e31a48 6884 /* Arguments are ready. Create the new vector stmt. */
f1f41a6c 6885 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
f2104a54 6886 {
f1f41a6c 6887 vec_cond_rhs = vec_oprnds1[i];
6888 vec_then_clause = vec_oprnds2[i];
6889 vec_else_clause = vec_oprnds3[i];
f05c3393 6890
f72ca119 6891 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6892 vec_cond_lhs, vec_cond_rhs);
f2104a54 6893 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6894 vec_compare, vec_then_clause, vec_else_clause);
f05c3393 6895
f2104a54 6896 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6897 new_temp = make_ssa_name (vec_dest, new_stmt);
6898 gimple_assign_set_lhs (new_stmt, new_temp);
6899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6900 if (slp_node)
f1f41a6c 6901 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
f2104a54 6902 }
6903
6904 if (slp_node)
6905 continue;
6906
6907 if (j == 0)
6908 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6909 else
6910 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6911
6912 prev_stmt_info = vinfo_for_stmt (new_stmt);
f05c3393 6913 }
48e1416a 6914
f1f41a6c 6915 vec_oprnds0.release ();
6916 vec_oprnds1.release ();
6917 vec_oprnds2.release ();
6918 vec_oprnds3.release ();
f2104a54 6919
fb85abff 6920 return true;
6921}
6922
6923
f083cd24 6924/* Make sure the statement is vectorizable. */
fb85abff 6925
6926bool
37545e54 6927vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
fb85abff 6928{
f083cd24 6929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
37545e54 6930 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
48e1416a 6931 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
fb85abff 6932 bool ok;
37545e54 6933 tree scalar_type, vectype;
18937389 6934 gimple pattern_stmt;
6935 gimple_seq pattern_def_seq;
fb85abff 6936
6d8fb6cf 6937 if (dump_enabled_p ())
fb85abff 6938 {
7bd765d4 6939 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6940 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 6941 dump_printf (MSG_NOTE, "\n");
f083cd24 6942 }
fb85abff 6943
9908a112 6944 if (gimple_has_volatile_ops (stmt))
48e1416a 6945 {
6d8fb6cf 6946 if (dump_enabled_p ())
7bd765d4 6947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 6948 "not vectorized: stmt has volatile operands\n");
9908a112 6949
6950 return false;
6951 }
48e1416a 6952
6953 /* Skip stmts that do not need to be vectorized. In loops this is expected
f083cd24 6954 to include:
6955 - the COND_EXPR which is the loop exit condition
6956 - any LABEL_EXPRs in the loop
48e1416a 6957 - computations that are used only for array indexing or loop control.
f083cd24 6958 In basic blocks we only analyze statements that are a part of some SLP
8bf58742 6959 instance, therefore, all the statements are relevant.
fb85abff 6960
b0f64919 6961 Pattern statement needs to be analyzed instead of the original statement
8bf58742 6962 if the original statement is not relevant. Otherwise, we analyze both
127cb1cd 6963 statements. In basic blocks we are called from some SLP instance
6964 traversal, don't analyze pattern stmts instead, the pattern stmts
6965 already will be part of SLP instance. */
8bf58742 6966
6967 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
48e1416a 6968 if (!STMT_VINFO_RELEVANT_P (stmt_info)
f083cd24 6969 && !STMT_VINFO_LIVE_P (stmt_info))
fb85abff 6970 {
cfdcf183 6971 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8bf58742 6972 && pattern_stmt
cfdcf183 6973 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6974 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6975 {
8bf58742 6976 /* Analyze PATTERN_STMT instead of the original stmt. */
cfdcf183 6977 stmt = pattern_stmt;
6978 stmt_info = vinfo_for_stmt (pattern_stmt);
6d8fb6cf 6979 if (dump_enabled_p ())
cfdcf183 6980 {
7bd765d4 6981 dump_printf_loc (MSG_NOTE, vect_location,
6982 "==> examining pattern statement: ");
6983 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 6984 dump_printf (MSG_NOTE, "\n");
cfdcf183 6985 }
6986 }
6987 else
6988 {
6d8fb6cf 6989 if (dump_enabled_p ())
78bb46f5 6990 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
fb85abff 6991
cfdcf183 6992 return true;
6993 }
f083cd24 6994 }
8bf58742 6995 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
127cb1cd 6996 && node == NULL
8bf58742 6997 && pattern_stmt
6998 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6999 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7000 {
7001 /* Analyze PATTERN_STMT too. */
6d8fb6cf 7002 if (dump_enabled_p ())
8bf58742 7003 {
7bd765d4 7004 dump_printf_loc (MSG_NOTE, vect_location,
7005 "==> examining pattern statement: ");
7006 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 7007 dump_printf (MSG_NOTE, "\n");
8bf58742 7008 }
7009
7010 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7011 return false;
7012 }
fb85abff 7013
45eea33f 7014 if (is_pattern_stmt_p (stmt_info)
127cb1cd 7015 && node == NULL
18937389 7016 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
45eea33f 7017 {
18937389 7018 gimple_stmt_iterator si;
45eea33f 7019
18937389 7020 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7021 {
7022 gimple pattern_def_stmt = gsi_stmt (si);
7023 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7024 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7025 {
7026 /* Analyze def stmt of STMT if it's a pattern stmt. */
6d8fb6cf 7027 if (dump_enabled_p ())
18937389 7028 {
7bd765d4 7029 dump_printf_loc (MSG_NOTE, vect_location,
7030 "==> examining pattern def statement: ");
7031 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
78bb46f5 7032 dump_printf (MSG_NOTE, "\n");
18937389 7033 }
45eea33f 7034
18937389 7035 if (!vect_analyze_stmt (pattern_def_stmt,
7036 need_to_vectorize, node))
7037 return false;
7038 }
7039 }
7040 }
45eea33f 7041
f083cd24 7042 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7043 {
7044 case vect_internal_def:
7045 break;
fb85abff 7046
f083cd24 7047 case vect_reduction_def:
ade2ac53 7048 case vect_nested_cycle:
37545e54 7049 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
f083cd24 7050 || relevance == vect_used_in_outer_by_reduction
37545e54 7051 || relevance == vect_unused_in_scope));
f083cd24 7052 break;
7053
7054 case vect_induction_def:
7055 case vect_constant_def:
7056 case vect_external_def:
7057 case vect_unknown_def_type:
7058 default:
7059 gcc_unreachable ();
7060 }
fb85abff 7061
37545e54 7062 if (bb_vinfo)
7063 {
7064 gcc_assert (PURE_SLP_STMT (stmt_info));
7065
b334cbba 7066 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
6d8fb6cf 7067 if (dump_enabled_p ())
37545e54 7068 {
7bd765d4 7069 dump_printf_loc (MSG_NOTE, vect_location,
7070 "get vectype for scalar type: ");
7071 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
78bb46f5 7072 dump_printf (MSG_NOTE, "\n");
37545e54 7073 }
7074
7075 vectype = get_vectype_for_scalar_type (scalar_type);
7076 if (!vectype)
7077 {
6d8fb6cf 7078 if (dump_enabled_p ())
37545e54 7079 {
7bd765d4 7080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7081 "not SLPed: unsupported data-type ");
7082 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7083 scalar_type);
78bb46f5 7084 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
37545e54 7085 }
7086 return false;
7087 }
7088
6d8fb6cf 7089 if (dump_enabled_p ())
37545e54 7090 {
7bd765d4 7091 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7092 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
78bb46f5 7093 dump_printf (MSG_NOTE, "\n");
37545e54 7094 }
7095
7096 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7097 }
7098
f083cd24 7099 if (STMT_VINFO_RELEVANT_P (stmt_info))
fb85abff 7100 {
f083cd24 7101 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
d09768a4 7102 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7103 || (is_gimple_call (stmt)
7104 && gimple_call_lhs (stmt) == NULL_TREE));
f083cd24 7105 *need_to_vectorize = true;
fb85abff 7106 }
7107
f083cd24 7108 ok = true;
48e1416a 7109 if (!bb_vinfo
37545e54 7110 && (STMT_VINFO_RELEVANT_P (stmt_info)
7111 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
d09768a4 7112 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7113 || vectorizable_conversion (stmt, NULL, NULL, NULL)
09e31a48 7114 || vectorizable_shift (stmt, NULL, NULL, NULL)
f083cd24 7115 || vectorizable_operation (stmt, NULL, NULL, NULL)
7116 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7117 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
b57a47d0 7118 || vectorizable_call (stmt, NULL, NULL, NULL)
f083cd24 7119 || vectorizable_store (stmt, NULL, NULL, NULL)
eefa05c8 7120 || vectorizable_reduction (stmt, NULL, NULL, NULL)
f2104a54 7121 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
37545e54 7122 else
7123 {
7124 if (bb_vinfo)
d09768a4 7125 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7126 || vectorizable_conversion (stmt, NULL, NULL, node)
7d96a007 7127 || vectorizable_shift (stmt, NULL, NULL, node)
09e31a48 7128 || vectorizable_operation (stmt, NULL, NULL, node)
37545e54 7129 || vectorizable_assignment (stmt, NULL, NULL, node)
7130 || vectorizable_load (stmt, NULL, NULL, node, NULL)
b57a47d0 7131 || vectorizable_call (stmt, NULL, NULL, node)
f2104a54 7132 || vectorizable_store (stmt, NULL, NULL, node)
7133 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
48e1416a 7134 }
f083cd24 7135
7136 if (!ok)
fb85abff 7137 {
6d8fb6cf 7138 if (dump_enabled_p ())
f083cd24 7139 {
7bd765d4 7140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7141 "not vectorized: relevant stmt not ");
7142 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7143 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 7144 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f083cd24 7145 }
48e1416a 7146
fb85abff 7147 return false;
7148 }
7149
37545e54 7150 if (bb_vinfo)
7151 return true;
7152
f083cd24 7153 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7154 need extra handling, except for vectorizable reductions. */
7155 if (STMT_VINFO_LIVE_P (stmt_info)
7156 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7157 ok = vectorizable_live_operation (stmt, NULL, NULL);
fb85abff 7158
f083cd24 7159 if (!ok)
fb85abff 7160 {
6d8fb6cf 7161 if (dump_enabled_p ())
f083cd24 7162 {
7bd765d4 7163 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7164 "not vectorized: live stmt not ");
7165 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7166 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 7167 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f083cd24 7168 }
48e1416a 7169
f083cd24 7170 return false;
fb85abff 7171 }
7172
fb85abff 7173 return true;
7174}
7175
7176
7177/* Function vect_transform_stmt.
7178
7179 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7180
7181bool
7182vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
ee612634 7183 bool *grouped_store, slp_tree slp_node,
fb85abff 7184 slp_instance slp_node_instance)
7185{
7186 bool is_store = false;
7187 gimple vec_stmt = NULL;
7188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
fb85abff 7189 bool done;
fb85abff 7190
7191 switch (STMT_VINFO_TYPE (stmt_info))
7192 {
7193 case type_demotion_vec_info_type:
fb85abff 7194 case type_promotion_vec_info_type:
fb85abff 7195 case type_conversion_vec_info_type:
7196 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7197 gcc_assert (done);
7198 break;
7199
7200 case induc_vec_info_type:
7201 gcc_assert (!slp_node);
7202 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7203 gcc_assert (done);
7204 break;
7205
09e31a48 7206 case shift_vec_info_type:
7207 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7208 gcc_assert (done);
7209 break;
7210
fb85abff 7211 case op_vec_info_type:
7212 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7213 gcc_assert (done);
7214 break;
7215
7216 case assignment_vec_info_type:
7217 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7218 gcc_assert (done);
7219 break;
7220
7221 case load_vec_info_type:
48e1416a 7222 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
fb85abff 7223 slp_node_instance);
7224 gcc_assert (done);
7225 break;
7226
7227 case store_vec_info_type:
7228 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7229 gcc_assert (done);
ee612634 7230 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
fb85abff 7231 {
7232 /* In case of interleaving, the whole chain is vectorized when the
282bf14c 7233 last store in the chain is reached. Store stmts before the last
fb85abff 7234 one are skipped, and there vec_stmt_info shouldn't be freed
7235 meanwhile. */
ee612634 7236 *grouped_store = true;
fb85abff 7237 if (STMT_VINFO_VEC_STMT (stmt_info))
7238 is_store = true;
7239 }
7240 else
7241 is_store = true;
7242 break;
7243
7244 case condition_vec_info_type:
f2104a54 7245 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
fb85abff 7246 gcc_assert (done);
7247 break;
7248
7249 case call_vec_info_type:
b57a47d0 7250 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5a3c210a 7251 stmt = gsi_stmt (*gsi);
c71d3c24 7252 if (is_gimple_call (stmt)
7253 && gimple_call_internal_p (stmt)
7254 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7255 is_store = true;
fb85abff 7256 break;
7257
d09768a4 7258 case call_simd_clone_vec_info_type:
7259 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7260 stmt = gsi_stmt (*gsi);
7261 break;
7262
fb85abff 7263 case reduc_vec_info_type:
eefa05c8 7264 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
fb85abff 7265 gcc_assert (done);
7266 break;
7267
7268 default:
7269 if (!STMT_VINFO_LIVE_P (stmt_info))
7270 {
6d8fb6cf 7271 if (dump_enabled_p ())
7bd765d4 7272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 7273 "stmt not supported.\n");
fb85abff 7274 gcc_unreachable ();
7275 }
7276 }
7277
7278 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7279 is being vectorized, but outside the immediately enclosing loop. */
7280 if (vec_stmt
37545e54 7281 && STMT_VINFO_LOOP_VINFO (stmt_info)
7282 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7283 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
fb85abff 7284 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7285 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
48e1416a 7286 || STMT_VINFO_RELEVANT (stmt_info) ==
37545e54 7287 vect_used_in_outer_by_reduction))
fb85abff 7288 {
37545e54 7289 struct loop *innerloop = LOOP_VINFO_LOOP (
7290 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
fb85abff 7291 imm_use_iterator imm_iter;
7292 use_operand_p use_p;
7293 tree scalar_dest;
7294 gimple exit_phi;
7295
6d8fb6cf 7296 if (dump_enabled_p ())
7bd765d4 7297 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 7298 "Record the vdef for outer-loop vectorization.\n");
fb85abff 7299
7300 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7301 (to be used when vectorizing outer-loop stmts that use the DEF of
7302 STMT). */
7303 if (gimple_code (stmt) == GIMPLE_PHI)
7304 scalar_dest = PHI_RESULT (stmt);
7305 else
7306 scalar_dest = gimple_assign_lhs (stmt);
7307
7308 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7309 {
7310 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7311 {
7312 exit_phi = USE_STMT (use_p);
7313 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7314 }
7315 }
7316 }
7317
7318 /* Handle stmts whose DEF is used outside the loop-nest that is
7319 being vectorized. */
7320 if (STMT_VINFO_LIVE_P (stmt_info)
7321 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7322 {
7323 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7324 gcc_assert (done);
7325 }
7326
7327 if (vec_stmt)
8bf58742 7328 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
fb85abff 7329
48e1416a 7330 return is_store;
fb85abff 7331}
7332
7333
48e1416a 7334/* Remove a group of stores (for SLP or interleaving), free their
fb85abff 7335 stmt_vec_info. */
7336
7337void
7338vect_remove_stores (gimple first_stmt)
7339{
7340 gimple next = first_stmt;
7341 gimple tmp;
7342 gimple_stmt_iterator next_si;
7343
7344 while (next)
7345 {
3b515af5 7346 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7347
7348 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7349 if (is_pattern_stmt_p (stmt_info))
7350 next = STMT_VINFO_RELATED_STMT (stmt_info);
fb85abff 7351 /* Free the attached stmt_vec_info and remove the stmt. */
7352 next_si = gsi_for_stmt (next);
bc8a8451 7353 unlink_stmt_vdef (next);
fb85abff 7354 gsi_remove (&next_si, true);
bc8a8451 7355 release_defs (next);
fb85abff 7356 free_stmt_vec_info (next);
7357 next = tmp;
7358 }
7359}
7360
7361
7362/* Function new_stmt_vec_info.
7363
7364 Create and initialize a new stmt_vec_info struct for STMT. */
7365
7366stmt_vec_info
48e1416a 7367new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
37545e54 7368 bb_vec_info bb_vinfo)
fb85abff 7369{
7370 stmt_vec_info res;
7371 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7372
7373 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7374 STMT_VINFO_STMT (res) = stmt;
7375 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
37545e54 7376 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
f083cd24 7377 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
fb85abff 7378 STMT_VINFO_LIVE_P (res) = false;
7379 STMT_VINFO_VECTYPE (res) = NULL;
7380 STMT_VINFO_VEC_STMT (res) = NULL;
6ea6a380 7381 STMT_VINFO_VECTORIZABLE (res) = true;
fb85abff 7382 STMT_VINFO_IN_PATTERN_P (res) = false;
7383 STMT_VINFO_RELATED_STMT (res) = NULL;
18937389 7384 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
fb85abff 7385 STMT_VINFO_DATA_REF (res) = NULL;
7386
7387 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7388 STMT_VINFO_DR_OFFSET (res) = NULL;
7389 STMT_VINFO_DR_INIT (res) = NULL;
7390 STMT_VINFO_DR_STEP (res) = NULL;
7391 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7392
7393 if (gimple_code (stmt) == GIMPLE_PHI
7394 && is_loop_header_bb_p (gimple_bb (stmt)))
7395 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7396 else
f083cd24 7397 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7398
f1f41a6c 7399 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8458f4ca 7400 STMT_SLP_TYPE (res) = loop_vect;
21009880 7401 GROUP_FIRST_ELEMENT (res) = NULL;
7402 GROUP_NEXT_ELEMENT (res) = NULL;
7403 GROUP_SIZE (res) = 0;
7404 GROUP_STORE_COUNT (res) = 0;
7405 GROUP_GAP (res) = 0;
7406 GROUP_SAME_DR_STMT (res) = NULL;
fb85abff 7407
7408 return res;
7409}
7410
7411
7412/* Create a hash table for stmt_vec_info. */
7413
7414void
7415init_stmt_vec_info_vec (void)
7416{
f1f41a6c 7417 gcc_assert (!stmt_vec_info_vec.exists ());
7418 stmt_vec_info_vec.create (50);
fb85abff 7419}
7420
7421
7422/* Free hash table for stmt_vec_info. */
7423
7424void
7425free_stmt_vec_info_vec (void)
7426{
6ae8a044 7427 unsigned int i;
7428 vec_void_p info;
7429 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7430 if (info != NULL)
7431 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
f1f41a6c 7432 gcc_assert (stmt_vec_info_vec.exists ());
7433 stmt_vec_info_vec.release ();
fb85abff 7434}
7435
7436
7437/* Free stmt vectorization related info. */
7438
7439void
7440free_stmt_vec_info (gimple stmt)
7441{
7442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7443
7444 if (!stmt_info)
7445 return;
7446
3b515af5 7447 /* Check if this statement has a related "pattern stmt"
7448 (introduced by the vectorizer during the pattern recognition
7449 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7450 too. */
7451 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7452 {
7453 stmt_vec_info patt_info
7454 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7455 if (patt_info)
7456 {
18937389 7457 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
bbf7a2bd 7458 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7459 gimple_set_bb (patt_stmt, NULL);
7460 tree lhs = gimple_get_lhs (patt_stmt);
7461 if (TREE_CODE (lhs) == SSA_NAME)
7462 release_ssa_name (lhs);
18937389 7463 if (seq)
7464 {
7465 gimple_stmt_iterator si;
7466 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
bbf7a2bd 7467 {
7468 gimple seq_stmt = gsi_stmt (si);
7469 gimple_set_bb (seq_stmt, NULL);
7470 lhs = gimple_get_lhs (patt_stmt);
7471 if (TREE_CODE (lhs) == SSA_NAME)
7472 release_ssa_name (lhs);
7473 free_stmt_vec_info (seq_stmt);
7474 }
18937389 7475 }
bbf7a2bd 7476 free_stmt_vec_info (patt_stmt);
3b515af5 7477 }
7478 }
7479
f1f41a6c 7480 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
fb85abff 7481 set_vinfo_for_stmt (stmt, NULL);
7482 free (stmt_info);
7483}
7484
7485
c4740c5d 7486/* Function get_vectype_for_scalar_type_and_size.
fb85abff 7487
c4740c5d 7488 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
fb85abff 7489 by the target. */
7490
c4740c5d 7491static tree
7492get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
fb85abff 7493{
3754d046 7494 machine_mode inner_mode = TYPE_MODE (scalar_type);
7495 machine_mode simd_mode;
d3791b80 7496 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
fb85abff 7497 int nunits;
7498 tree vectype;
7499
b24d851f 7500 if (nbytes == 0)
fb85abff 7501 return NULL_TREE;
7502
59980d82 7503 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7504 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7505 return NULL_TREE;
7506
6960a794 7507 /* For vector types of elements whose mode precision doesn't
7508 match their types precision we use a element type of mode
7509 precision. The vectorization routines will have to make sure
59980d82 7510 they support the proper result truncation/extension.
7511 We also make sure to build vector types with INTEGER_TYPE
7512 component type only. */
1538bf19 7513 if (INTEGRAL_TYPE_P (scalar_type)
59980d82 7514 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7515 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6960a794 7516 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7517 TYPE_UNSIGNED (scalar_type));
1538bf19 7518
33c20fdf 7519 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7520 When the component mode passes the above test simply use a type
7521 corresponding to that mode. The theory is that any use that
7522 would cause problems with this will disable vectorization anyway. */
e4a3dae2 7523 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
99f81ffb 7524 && !INTEGRAL_TYPE_P (scalar_type))
c74df0c8 7525 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7526
7527 /* We can't build a vector type of elements with alignment bigger than
7528 their size. */
e4a3dae2 7529 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
50c96f17 7530 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7531 TYPE_UNSIGNED (scalar_type));
33c20fdf 7532
e4a3dae2 7533 /* If we felt back to using the mode fail if there was
7534 no scalar type for it. */
7535 if (scalar_type == NULL_TREE)
7536 return NULL_TREE;
7537
c4740c5d 7538 /* If no size was supplied use the mode the target prefers. Otherwise
7539 lookup a vector mode of the specified size. */
7540 if (size == 0)
7541 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7542 else
7543 simd_mode = mode_for_vector (inner_mode, size / nbytes);
b24d851f 7544 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7545 if (nunits <= 1)
7546 return NULL_TREE;
fb85abff 7547
7548 vectype = build_vector_type (scalar_type, nunits);
fb85abff 7549
7550 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7551 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
0bf5f81b 7552 return NULL_TREE;
fb85abff 7553
7554 return vectype;
7555}
7556
c4740c5d 7557unsigned int current_vector_size;
7558
7559/* Function get_vectype_for_scalar_type.
7560
7561 Returns the vector type corresponding to SCALAR_TYPE as supported
7562 by the target. */
7563
7564tree
7565get_vectype_for_scalar_type (tree scalar_type)
7566{
7567 tree vectype;
7568 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7569 current_vector_size);
7570 if (vectype
7571 && current_vector_size == 0)
7572 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7573 return vectype;
7574}
7575
b334cbba 7576/* Function get_same_sized_vectype
7577
7578 Returns a vector type corresponding to SCALAR_TYPE of size
7579 VECTOR_TYPE if supported by the target. */
7580
7581tree
c4740c5d 7582get_same_sized_vectype (tree scalar_type, tree vector_type)
b334cbba 7583{
c4740c5d 7584 return get_vectype_for_scalar_type_and_size
7585 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b334cbba 7586}
7587
fb85abff 7588/* Function vect_is_simple_use.
7589
7590 Input:
37545e54 7591 LOOP_VINFO - the vect info of the loop that is being vectorized.
7592 BB_VINFO - the vect info of the basic block that is being vectorized.
bed8b93b 7593 OPERAND - operand of STMT in the loop or bb.
fb85abff 7594 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7595
7596 Returns whether a stmt with OPERAND can be vectorized.
48e1416a 7597 For loops, supportable operands are constants, loop invariants, and operands
282bf14c 7598 that are defined by the current iteration of the loop. Unsupportable
48e1416a 7599 operands are those that are defined by a previous iteration of the loop (as
37545e54 7600 is the case in reduction/induction computations).
7601 For basic blocks, supportable operands are constants and bb invariants.
7602 For now, operands defined outside the basic block are not supported. */
fb85abff 7603
7604bool
bed8b93b 7605vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
37545e54 7606 bb_vec_info bb_vinfo, gimple *def_stmt,
fb85abff 7607 tree *def, enum vect_def_type *dt)
48e1416a 7608{
fb85abff 7609 basic_block bb;
7610 stmt_vec_info stmt_vinfo;
37545e54 7611 struct loop *loop = NULL;
48e1416a 7612
37545e54 7613 if (loop_vinfo)
7614 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 7615
7616 *def_stmt = NULL;
7617 *def = NULL_TREE;
48e1416a 7618
6d8fb6cf 7619 if (dump_enabled_p ())
fb85abff 7620 {
7bd765d4 7621 dump_printf_loc (MSG_NOTE, vect_location,
7622 "vect_is_simple_use: operand ");
7623 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
78bb46f5 7624 dump_printf (MSG_NOTE, "\n");
fb85abff 7625 }
48e1416a 7626
ace64c06 7627 if (CONSTANT_CLASS_P (operand))
fb85abff 7628 {
7629 *dt = vect_constant_def;
7630 return true;
7631 }
48e1416a 7632
fb85abff 7633 if (is_gimple_min_invariant (operand))
7634 {
7635 *def = operand;
f083cd24 7636 *dt = vect_external_def;
fb85abff 7637 return true;
7638 }
7639
7640 if (TREE_CODE (operand) == PAREN_EXPR)
7641 {
6d8fb6cf 7642 if (dump_enabled_p ())
78bb46f5 7643 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
fb85abff 7644 operand = TREE_OPERAND (operand, 0);
7645 }
48e1416a 7646
fb85abff 7647 if (TREE_CODE (operand) != SSA_NAME)
7648 {
6d8fb6cf 7649 if (dump_enabled_p ())
7bd765d4 7650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 7651 "not ssa-name.\n");
fb85abff 7652 return false;
7653 }
48e1416a 7654
fb85abff 7655 *def_stmt = SSA_NAME_DEF_STMT (operand);
7656 if (*def_stmt == NULL)
7657 {
6d8fb6cf 7658 if (dump_enabled_p ())
7bd765d4 7659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 7660 "no def_stmt.\n");
fb85abff 7661 return false;
7662 }
7663
6d8fb6cf 7664 if (dump_enabled_p ())
fb85abff 7665 {
7bd765d4 7666 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
78bb46f5 7668 dump_printf (MSG_NOTE, "\n");
fb85abff 7669 }
7670
f083cd24 7671 /* Empty stmt is expected only in case of a function argument.
fb85abff 7672 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7673 if (gimple_nop_p (*def_stmt))
7674 {
7675 *def = operand;
f083cd24 7676 *dt = vect_external_def;
fb85abff 7677 return true;
7678 }
7679
7680 bb = gimple_bb (*def_stmt);
37545e54 7681
7682 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7683 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
48e1416a 7684 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
f083cd24 7685 *dt = vect_external_def;
fb85abff 7686 else
7687 {
7688 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7689 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7690 }
7691
bed8b93b 7692 if (*dt == vect_unknown_def_type
7693 || (stmt
7694 && *dt == vect_double_reduction_def
7695 && gimple_code (stmt) != GIMPLE_PHI))
fb85abff 7696 {
6d8fb6cf 7697 if (dump_enabled_p ())
7bd765d4 7698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 7699 "Unsupported pattern.\n");
fb85abff 7700 return false;
7701 }
7702
6d8fb6cf 7703 if (dump_enabled_p ())
78bb46f5 7704 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
fb85abff 7705
7706 switch (gimple_code (*def_stmt))
7707 {
7708 case GIMPLE_PHI:
7709 *def = gimple_phi_result (*def_stmt);
7710 break;
7711
7712 case GIMPLE_ASSIGN:
7713 *def = gimple_assign_lhs (*def_stmt);
7714 break;
7715
7716 case GIMPLE_CALL:
7717 *def = gimple_call_lhs (*def_stmt);
7718 if (*def != NULL)
7719 break;
7720 /* FALLTHRU */
7721 default:
6d8fb6cf 7722 if (dump_enabled_p ())
7bd765d4 7723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 7724 "unsupported defining stmt:\n");
fb85abff 7725 return false;
7726 }
7727
7728 return true;
7729}
7730
b334cbba 7731/* Function vect_is_simple_use_1.
7732
7733 Same as vect_is_simple_use_1 but also determines the vector operand
7734 type of OPERAND and stores it to *VECTYPE. If the definition of
7735 OPERAND is vect_uninitialized_def, vect_constant_def or
7736 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7737 is responsible to compute the best suited vector type for the
7738 scalar operand. */
7739
7740bool
bed8b93b 7741vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
b334cbba 7742 bb_vec_info bb_vinfo, gimple *def_stmt,
7743 tree *def, enum vect_def_type *dt, tree *vectype)
7744{
bed8b93b 7745 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7746 def, dt))
b334cbba 7747 return false;
7748
7749 /* Now get a vector type if the def is internal, otherwise supply
7750 NULL_TREE and leave it up to the caller to figure out a proper
7751 type for the use stmt. */
7752 if (*dt == vect_internal_def
7753 || *dt == vect_induction_def
7754 || *dt == vect_reduction_def
7755 || *dt == vect_double_reduction_def
7756 || *dt == vect_nested_cycle)
7757 {
7758 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8bf58742 7759
7760 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7761 && !STMT_VINFO_RELEVANT (stmt_info)
7762 && !STMT_VINFO_LIVE_P (stmt_info))
b334cbba 7763 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8bf58742 7764
b334cbba 7765 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7766 gcc_assert (*vectype != NULL_TREE);
7767 }
7768 else if (*dt == vect_uninitialized_def
7769 || *dt == vect_constant_def
7770 || *dt == vect_external_def)
7771 *vectype = NULL_TREE;
7772 else
7773 gcc_unreachable ();
7774
7775 return true;
7776}
7777
fb85abff 7778
7779/* Function supportable_widening_operation
7780
48e1416a 7781 Check whether an operation represented by the code CODE is a
7782 widening operation that is supported by the target platform in
b334cbba 7783 vector form (i.e., when operating on arguments of type VECTYPE_IN
7784 producing a result of type VECTYPE_OUT).
48e1416a 7785
fb85abff 7786 Widening operations we currently support are NOP (CONVERT), FLOAT
7787 and WIDEN_MULT. This function checks if these operations are supported
7788 by the target platform either directly (via vector tree-codes), or via
7789 target builtins.
7790
7791 Output:
48e1416a 7792 - CODE1 and CODE2 are codes of vector operations to be used when
7793 vectorizing the operation, if available.
fb85abff 7794 - MULTI_STEP_CVT determines the number of required intermediate steps in
7795 case of multi-step conversion (like char->short->int - in that case
7796 MULTI_STEP_CVT will be 1).
48e1416a 7797 - INTERM_TYPES contains the intermediate type required to perform the
7798 widening operation (short in the above example). */
fb85abff 7799
7800bool
b334cbba 7801supportable_widening_operation (enum tree_code code, gimple stmt,
7802 tree vectype_out, tree vectype_in,
fb85abff 7803 enum tree_code *code1, enum tree_code *code2,
7804 int *multi_step_cvt,
f1f41a6c 7805 vec<tree> *interm_types)
fb85abff 7806{
7807 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7808 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
03dab834 7809 struct loop *vect_loop = NULL;
3754d046 7810 machine_mode vec_mode;
bc620c5c 7811 enum insn_code icode1, icode2;
fb85abff 7812 optab optab1, optab2;
b334cbba 7813 tree vectype = vectype_in;
7814 tree wide_vectype = vectype_out;
fb85abff 7815 enum tree_code c1, c2;
7d96a007 7816 int i;
7817 tree prev_type, intermediate_type;
3754d046 7818 machine_mode intermediate_mode, prev_mode;
7d96a007 7819 optab optab3, optab4;
fb85abff 7820
7d96a007 7821 *multi_step_cvt = 0;
03dab834 7822 if (loop_info)
7823 vect_loop = LOOP_VINFO_LOOP (loop_info);
7824
fb85abff 7825 switch (code)
7826 {
7827 case WIDEN_MULT_EXPR:
04fb053a 7828 /* The result of a vectorized widening operation usually requires
7829 two vectors (because the widened results do not fit into one vector).
7830 The generated vector results would normally be expected to be
7831 generated in the same order as in the original scalar computation,
7832 i.e. if 8 results are generated in each vector iteration, they are
7833 to be organized as follows:
7834 vect1: [res1,res2,res3,res4],
7835 vect2: [res5,res6,res7,res8].
7836
7837 However, in the special case that the result of the widening
7838 operation is used in a reduction computation only, the order doesn't
7839 matter (because when vectorizing a reduction we change the order of
7840 the computation). Some targets can take advantage of this and
7841 generate more efficient code. For example, targets like Altivec,
7842 that support widen_mult using a sequence of {mult_even,mult_odd}
7843 generate the following vectors:
7844 vect1: [res1,res3,res5,res7],
7845 vect2: [res2,res4,res6,res8].
7846
7847 When vectorizing outer-loops, we execute the inner-loop sequentially
7848 (each vectorized inner-loop iteration contributes to VF outer-loop
7849 iterations in parallel). We therefore don't allow to change the
7850 order of the computation in the inner-loop during outer-loop
7851 vectorization. */
7852 /* TODO: Another case in which order doesn't *really* matter is when we
7853 widen and then contract again, e.g. (short)((int)x * y >> 8).
7854 Normally, pack_trunc performs an even/odd permute, whereas the
7855 repack from an even/odd expansion would be an interleave, which
7856 would be significantly simpler for e.g. AVX2. */
7857 /* In any case, in order to avoid duplicating the code below, recurse
7858 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7859 are properly set up for the caller. If we fail, we'll continue with
7860 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7861 if (vect_loop
7862 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7863 && !nested_in_vect_loop_p (vect_loop, stmt)
7864 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7865 stmt, vectype_out, vectype_in,
087dde2d 7866 code1, code2, multi_step_cvt,
7867 interm_types))
4fc29ce4 7868 {
7869 /* Elements in a vector with vect_used_by_reduction property cannot
7870 be reordered if the use chain with this property does not have the
7871 same operation. One such an example is s += a * b, where elements
7872 in a and b cannot be reordered. Here we check if the vector defined
7873 by STMT is only directly used in the reduction statement. */
7874 tree lhs = gimple_assign_lhs (stmt);
7875 use_operand_p dummy;
7876 gimple use_stmt;
7877 stmt_vec_info use_stmt_info = NULL;
7878 if (single_imm_use (lhs, &dummy, &use_stmt)
7879 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7880 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7881 return true;
7882 }
7d96a007 7883 c1 = VEC_WIDEN_MULT_LO_EXPR;
7884 c2 = VEC_WIDEN_MULT_HI_EXPR;
fb85abff 7885 break;
7886
04fb053a 7887 case VEC_WIDEN_MULT_EVEN_EXPR:
7888 /* Support the recursion induced just above. */
7889 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7890 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7891 break;
7892
6083c152 7893 case WIDEN_LSHIFT_EXPR:
7d96a007 7894 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7895 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6083c152 7896 break;
7897
fb85abff 7898 CASE_CONVERT:
7d96a007 7899 c1 = VEC_UNPACK_LO_EXPR;
7900 c2 = VEC_UNPACK_HI_EXPR;
fb85abff 7901 break;
7902
7903 case FLOAT_EXPR:
7d96a007 7904 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7905 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
fb85abff 7906 break;
7907
7908 case FIX_TRUNC_EXPR:
7909 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7910 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7911 computing the operation. */
7912 return false;
7913
7914 default:
7915 gcc_unreachable ();
7916 }
7917
04fb053a 7918 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7d96a007 7919 {
7920 enum tree_code ctmp = c1;
7921 c1 = c2;
7922 c2 = ctmp;
7923 }
7924
fb85abff 7925 if (code == FIX_TRUNC_EXPR)
7926 {
7927 /* The signedness is determined from output operand. */
b334cbba 7928 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7929 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
fb85abff 7930 }
7931 else
7932 {
7933 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7934 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7935 }
7936
7937 if (!optab1 || !optab2)
7938 return false;
7939
7940 vec_mode = TYPE_MODE (vectype);
d6bf3b14 7941 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7942 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
fb85abff 7943 return false;
7944
7d96a007 7945 *code1 = c1;
7946 *code2 = c2;
7947
7948 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7949 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7950 return true;
7951
48e1416a 7952 /* Check if it's a multi-step conversion that can be done using intermediate
fb85abff 7953 types. */
fb85abff 7954
7d96a007 7955 prev_type = vectype;
7956 prev_mode = vec_mode;
48e1416a 7957
7d96a007 7958 if (!CONVERT_EXPR_CODE_P (code))
7959 return false;
48e1416a 7960
7d96a007 7961 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7962 intermediate steps in promotion sequence. We try
7963 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7964 not. */
f1f41a6c 7965 interm_types->create (MAX_INTERM_CVT_STEPS);
7d96a007 7966 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7967 {
7968 intermediate_mode = insn_data[icode1].operand[0].mode;
7969 intermediate_type
7970 = lang_hooks.types.type_for_mode (intermediate_mode,
7971 TYPE_UNSIGNED (prev_type));
7972 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7973 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7974
7975 if (!optab3 || !optab4
7976 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7977 || insn_data[icode1].operand[0].mode != intermediate_mode
7978 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7979 || insn_data[icode2].operand[0].mode != intermediate_mode
7980 || ((icode1 = optab_handler (optab3, intermediate_mode))
7981 == CODE_FOR_nothing)
7982 || ((icode2 = optab_handler (optab4, intermediate_mode))
7983 == CODE_FOR_nothing))
7984 break;
fb85abff 7985
f1f41a6c 7986 interm_types->quick_push (intermediate_type);
7d96a007 7987 (*multi_step_cvt)++;
7988
7989 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7990 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7991 return true;
7992
7993 prev_type = intermediate_type;
7994 prev_mode = intermediate_mode;
fb85abff 7995 }
7996
f1f41a6c 7997 interm_types->release ();
7d96a007 7998 return false;
fb85abff 7999}
8000
8001
8002/* Function supportable_narrowing_operation
8003
48e1416a 8004 Check whether an operation represented by the code CODE is a
8005 narrowing operation that is supported by the target platform in
b334cbba 8006 vector form (i.e., when operating on arguments of type VECTYPE_IN
8007 and producing a result of type VECTYPE_OUT).
48e1416a 8008
fb85abff 8009 Narrowing operations we currently support are NOP (CONVERT) and
282bf14c 8010 FIX_TRUNC. This function checks if these operations are supported by
fb85abff 8011 the target platform directly via vector tree-codes.
8012
8013 Output:
48e1416a 8014 - CODE1 is the code of a vector operation to be used when
8015 vectorizing the operation, if available.
fb85abff 8016 - MULTI_STEP_CVT determines the number of required intermediate steps in
8017 case of multi-step conversion (like int->short->char - in that case
8018 MULTI_STEP_CVT will be 1).
8019 - INTERM_TYPES contains the intermediate type required to perform the
48e1416a 8020 narrowing operation (short in the above example). */
fb85abff 8021
8022bool
8023supportable_narrowing_operation (enum tree_code code,
b334cbba 8024 tree vectype_out, tree vectype_in,
fb85abff 8025 enum tree_code *code1, int *multi_step_cvt,
f1f41a6c 8026 vec<tree> *interm_types)
fb85abff 8027{
3754d046 8028 machine_mode vec_mode;
fb85abff 8029 enum insn_code icode1;
8030 optab optab1, interm_optab;
b334cbba 8031 tree vectype = vectype_in;
8032 tree narrow_vectype = vectype_out;
fb85abff 8033 enum tree_code c1;
7d96a007 8034 tree intermediate_type;
3754d046 8035 machine_mode intermediate_mode, prev_mode;
fb85abff 8036 int i;
7d96a007 8037 bool uns;
fb85abff 8038
7d96a007 8039 *multi_step_cvt = 0;
fb85abff 8040 switch (code)
8041 {
8042 CASE_CONVERT:
8043 c1 = VEC_PACK_TRUNC_EXPR;
8044 break;
8045
8046 case FIX_TRUNC_EXPR:
8047 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8048 break;
8049
8050 case FLOAT_EXPR:
8051 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8052 tree code and optabs used for computing the operation. */
8053 return false;
8054
8055 default:
8056 gcc_unreachable ();
8057 }
8058
8059 if (code == FIX_TRUNC_EXPR)
8060 /* The signedness is determined from output operand. */
b334cbba 8061 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
fb85abff 8062 else
8063 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8064
8065 if (!optab1)
8066 return false;
8067
8068 vec_mode = TYPE_MODE (vectype);
d6bf3b14 8069 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
fb85abff 8070 return false;
8071
7d96a007 8072 *code1 = c1;
8073
8074 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8075 return true;
8076
fb85abff 8077 /* Check if it's a multi-step conversion that can be done using intermediate
8078 types. */
7d96a007 8079 prev_mode = vec_mode;
8080 if (code == FIX_TRUNC_EXPR)
8081 uns = TYPE_UNSIGNED (vectype_out);
8082 else
8083 uns = TYPE_UNSIGNED (vectype);
8084
8085 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8086 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8087 costly than signed. */
8088 if (code == FIX_TRUNC_EXPR && uns)
8089 {
8090 enum insn_code icode2;
8091
8092 intermediate_type
8093 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8094 interm_optab
8095 = optab_for_tree_code (c1, intermediate_type, optab_default);
6cdd383a 8096 if (interm_optab != unknown_optab
7d96a007 8097 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8098 && insn_data[icode1].operand[0].mode
8099 == insn_data[icode2].operand[0].mode)
8100 {
8101 uns = false;
8102 optab1 = interm_optab;
8103 icode1 = icode2;
8104 }
8105 }
fb85abff 8106
7d96a007 8107 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8108 intermediate steps in promotion sequence. We try
8109 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
f1f41a6c 8110 interm_types->create (MAX_INTERM_CVT_STEPS);
7d96a007 8111 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8112 {
8113 intermediate_mode = insn_data[icode1].operand[0].mode;
8114 intermediate_type
8115 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8116 interm_optab
8117 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8118 optab_default);
8119 if (!interm_optab
8120 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8121 || insn_data[icode1].operand[0].mode != intermediate_mode
8122 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8123 == CODE_FOR_nothing))
8124 break;
8125
f1f41a6c 8126 interm_types->quick_push (intermediate_type);
7d96a007 8127 (*multi_step_cvt)++;
8128
8129 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8130 return true;
8131
8132 prev_mode = intermediate_mode;
8133 optab1 = interm_optab;
fb85abff 8134 }
8135
f1f41a6c 8136 interm_types->release ();
7d96a007 8137 return false;
fb85abff 8138}