]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
value-prof.c (gimple_ic): Use stmt_ends_bb_p to detect the case we need to split...
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
62f7fd21
MM
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
718f9c0f 40#include "diagnostic-core.h"
ebfd146a
IR
41#include "tree-vectorizer.h"
42#include "langhooks.h"
43
44
45/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
46
47/* Function vect_mark_relevant.
48
49 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
50
51static void
52vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
53 enum vect_relevant relevant, bool live_p)
54{
55 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
56 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
57 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
58
59 if (vect_print_dump_info (REPORT_DETAILS))
60 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
61
62 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
63 {
64 gimple pattern_stmt;
65
b8698a0f 66 /* This is the last stmt in a sequence that was detected as a
ebfd146a
IR
67 pattern that can potentially be vectorized. Don't mark the stmt
68 as relevant/live because it's not going to be vectorized.
69 Instead mark the pattern-stmt that replaces it. */
70
71 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
72
73 if (vect_print_dump_info (REPORT_DETAILS))
74 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
75 stmt_info = vinfo_for_stmt (pattern_stmt);
76 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
77 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
78 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
79 stmt = pattern_stmt;
80 }
81
82 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
83 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
84 STMT_VINFO_RELEVANT (stmt_info) = relevant;
85
86 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
87 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
88 {
89 if (vect_print_dump_info (REPORT_DETAILS))
90 fprintf (vect_dump, "already marked relevant/live.");
91 return;
92 }
93
94 VEC_safe_push (gimple, heap, *worklist, stmt);
95}
96
97
98/* Function vect_stmt_relevant_p.
99
100 Return true if STMT in loop that is represented by LOOP_VINFO is
101 "relevant for vectorization".
102
103 A stmt is considered "relevant for vectorization" if:
104 - it has uses outside the loop.
105 - it has vdefs (it alters memory).
106 - control stmts in the loop (except for the exit condition).
107
108 CHECKME: what other side effects would the vectorizer allow? */
109
110static bool
111vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
112 enum vect_relevant *relevant, bool *live_p)
113{
114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
115 ssa_op_iter op_iter;
116 imm_use_iterator imm_iter;
117 use_operand_p use_p;
118 def_operand_p def_p;
119
8644a673 120 *relevant = vect_unused_in_scope;
ebfd146a
IR
121 *live_p = false;
122
123 /* cond stmt other than loop exit cond. */
b8698a0f
L
124 if (is_ctrl_stmt (stmt)
125 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
126 != loop_exit_ctrl_vec_info_type)
8644a673 127 *relevant = vect_used_in_scope;
ebfd146a
IR
128
129 /* changing memory. */
130 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 131 if (gimple_vdef (stmt))
ebfd146a
IR
132 {
133 if (vect_print_dump_info (REPORT_DETAILS))
134 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
8644a673 135 *relevant = vect_used_in_scope;
ebfd146a
IR
136 }
137
138 /* uses outside the loop. */
139 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
140 {
141 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
142 {
143 basic_block bb = gimple_bb (USE_STMT (use_p));
144 if (!flow_bb_inside_loop_p (loop, bb))
145 {
146 if (vect_print_dump_info (REPORT_DETAILS))
147 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
148
3157b0c2
AO
149 if (is_gimple_debug (USE_STMT (use_p)))
150 continue;
151
ebfd146a
IR
152 /* We expect all such uses to be in the loop exit phis
153 (because of loop closed form) */
154 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
155 gcc_assert (bb == single_exit (loop)->dest);
156
157 *live_p = true;
158 }
159 }
160 }
161
162 return (*live_p || *relevant);
163}
164
165
b8698a0f 166/* Function exist_non_indexing_operands_for_use_p
ebfd146a 167
ff802fa1 168 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
169 used in STMT for anything other than indexing an array. */
170
171static bool
172exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
173{
174 tree operand;
175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 176
ff802fa1 177 /* USE corresponds to some operand in STMT. If there is no data
ebfd146a
IR
178 reference in STMT, then any operand that corresponds to USE
179 is not indexing an array. */
180 if (!STMT_VINFO_DATA_REF (stmt_info))
181 return true;
59a05b0c 182
ebfd146a
IR
183 /* STMT has a data_ref. FORNOW this means that its of one of
184 the following forms:
185 -1- ARRAY_REF = var
186 -2- var = ARRAY_REF
187 (This should have been verified in analyze_data_refs).
188
189 'var' in the second case corresponds to a def, not a use,
b8698a0f 190 so USE cannot correspond to any operands that are not used
ebfd146a
IR
191 for array indexing.
192
193 Therefore, all we need to check is if STMT falls into the
194 first case, and whether var corresponds to USE. */
ebfd146a
IR
195
196 if (!gimple_assign_copy_p (stmt))
197 return false;
59a05b0c
EB
198 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
199 return false;
ebfd146a 200 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
201 if (TREE_CODE (operand) != SSA_NAME)
202 return false;
203
204 if (operand == use)
205 return true;
206
207 return false;
208}
209
210
b8698a0f 211/*
ebfd146a
IR
212 Function process_use.
213
214 Inputs:
215 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 216 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ff802fa1 217 that defined USE. This is done by calling mark_relevant and passing it
ebfd146a
IR
218 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
219
220 Outputs:
221 Generally, LIVE_P and RELEVANT are used to define the liveness and
222 relevance info of the DEF_STMT of this USE:
223 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
224 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
225 Exceptions:
226 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 227 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 228 of the respective DEF_STMT is left unchanged.
b8698a0f
L
229 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
230 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
231 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
232 be modified accordingly.
233
234 Return true if everything is as expected. Return false otherwise. */
235
236static bool
b8698a0f 237process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
ebfd146a
IR
238 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
239{
240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
241 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
242 stmt_vec_info dstmt_vinfo;
243 basic_block bb, def_bb;
244 tree def;
245 gimple def_stmt;
246 enum vect_def_type dt;
247
b8698a0f 248 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a
IR
249 that are used for address computation are not considered relevant. */
250 if (!exist_non_indexing_operands_for_use_p (use, stmt))
251 return true;
252
a70d6342 253 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 254 {
8644a673 255 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
256 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
257 return false;
258 }
259
260 if (!def_stmt || gimple_nop_p (def_stmt))
261 return true;
262
263 def_bb = gimple_bb (def_stmt);
264 if (!flow_bb_inside_loop_p (loop, def_bb))
265 {
266 if (vect_print_dump_info (REPORT_DETAILS))
267 fprintf (vect_dump, "def_stmt is out of loop.");
268 return true;
269 }
270
b8698a0f
L
271 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
272 DEF_STMT must have already been processed, because this should be the
273 only way that STMT, which is a reduction-phi, was put in the worklist,
274 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
275 check that everything is as expected, and we are done. */
276 dstmt_vinfo = vinfo_for_stmt (def_stmt);
277 bb = gimple_bb (stmt);
278 if (gimple_code (stmt) == GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
280 && gimple_code (def_stmt) != GIMPLE_PHI
281 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
282 && bb->loop_father == def_bb->loop_father)
283 {
284 if (vect_print_dump_info (REPORT_DETAILS))
285 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
286 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
287 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
288 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 289 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 290 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
291 return true;
292 }
293
294 /* case 3a: outer-loop stmt defining an inner-loop stmt:
295 outer-loop-header-bb:
296 d = def_stmt
297 inner-loop:
298 stmt # use (d)
299 outer-loop-tail-bb:
300 ... */
301 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
302 {
303 if (vect_print_dump_info (REPORT_DETAILS))
304 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
7c5222ff 305
ebfd146a
IR
306 switch (relevant)
307 {
8644a673 308 case vect_unused_in_scope:
7c5222ff
IR
309 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
310 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 311 break;
7c5222ff 312
ebfd146a 313 case vect_used_in_outer_by_reduction:
7c5222ff 314 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
315 relevant = vect_used_by_reduction;
316 break;
7c5222ff 317
ebfd146a 318 case vect_used_in_outer:
7c5222ff 319 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 320 relevant = vect_used_in_scope;
ebfd146a 321 break;
7c5222ff 322
8644a673 323 case vect_used_in_scope:
ebfd146a
IR
324 break;
325
326 default:
327 gcc_unreachable ();
b8698a0f 328 }
ebfd146a
IR
329 }
330
331 /* case 3b: inner-loop stmt defining an outer-loop stmt:
332 outer-loop-header-bb:
333 ...
334 inner-loop:
335 d = def_stmt
06066f92 336 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
337 stmt # use (d) */
338 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
339 {
340 if (vect_print_dump_info (REPORT_DETAILS))
341 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
7c5222ff 342
ebfd146a
IR
343 switch (relevant)
344 {
8644a673 345 case vect_unused_in_scope:
b8698a0f 346 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 347 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 348 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
349 break;
350
ebfd146a
IR
351 case vect_used_by_reduction:
352 relevant = vect_used_in_outer_by_reduction;
353 break;
354
8644a673 355 case vect_used_in_scope:
ebfd146a
IR
356 relevant = vect_used_in_outer;
357 break;
358
359 default:
360 gcc_unreachable ();
361 }
362 }
363
364 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
365 return true;
366}
367
368
369/* Function vect_mark_stmts_to_be_vectorized.
370
371 Not all stmts in the loop need to be vectorized. For example:
372
373 for i...
374 for j...
375 1. T0 = i + j
376 2. T1 = a[T0]
377
378 3. j = j + 1
379
380 Stmt 1 and 3 do not need to be vectorized, because loop control and
381 addressing of vectorized data-refs are handled differently.
382
383 This pass detects such stmts. */
384
385bool
386vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
387{
388 VEC(gimple,heap) *worklist;
389 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
390 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
391 unsigned int nbbs = loop->num_nodes;
392 gimple_stmt_iterator si;
393 gimple stmt;
394 unsigned int i;
395 stmt_vec_info stmt_vinfo;
396 basic_block bb;
397 gimple phi;
398 bool live_p;
06066f92
IR
399 enum vect_relevant relevant, tmp_relevant;
400 enum vect_def_type def_type;
ebfd146a
IR
401
402 if (vect_print_dump_info (REPORT_DETAILS))
403 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
404
405 worklist = VEC_alloc (gimple, heap, 64);
406
407 /* 1. Init worklist. */
408 for (i = 0; i < nbbs; i++)
409 {
410 bb = bbs[i];
411 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 412 {
ebfd146a
IR
413 phi = gsi_stmt (si);
414 if (vect_print_dump_info (REPORT_DETAILS))
415 {
416 fprintf (vect_dump, "init: phi relevant? ");
417 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
418 }
419
420 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
421 vect_mark_relevant (&worklist, phi, relevant, live_p);
422 }
423 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
424 {
425 stmt = gsi_stmt (si);
426 if (vect_print_dump_info (REPORT_DETAILS))
427 {
428 fprintf (vect_dump, "init: stmt relevant? ");
429 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 430 }
ebfd146a
IR
431
432 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
433 vect_mark_relevant (&worklist, stmt, relevant, live_p);
434 }
435 }
436
437 /* 2. Process_worklist */
438 while (VEC_length (gimple, worklist) > 0)
439 {
440 use_operand_p use_p;
441 ssa_op_iter iter;
442
443 stmt = VEC_pop (gimple, worklist);
444 if (vect_print_dump_info (REPORT_DETAILS))
445 {
446 fprintf (vect_dump, "worklist: examine stmt: ");
447 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
448 }
449
b8698a0f
L
450 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
451 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
452 liveness and relevance properties of STMT. */
453 stmt_vinfo = vinfo_for_stmt (stmt);
454 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
455 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
456
457 /* Generally, the liveness and relevance properties of STMT are
458 propagated as is to the DEF_STMTs of its USEs:
459 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
460 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
461
462 One exception is when STMT has been identified as defining a reduction
463 variable; in this case we set the liveness/relevance as follows:
464 live_p = false
465 relevant = vect_used_by_reduction
466 This is because we distinguish between two kinds of relevant stmts -
b8698a0f 467 those that are used by a reduction computation, and those that are
ff802fa1 468 (also) used by a regular computation. This allows us later on to
b8698a0f 469 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 470 order of the results that they produce does not have to be kept. */
ebfd146a 471
06066f92
IR
472 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
473 tmp_relevant = relevant;
474 switch (def_type)
ebfd146a 475 {
06066f92
IR
476 case vect_reduction_def:
477 switch (tmp_relevant)
478 {
479 case vect_unused_in_scope:
480 relevant = vect_used_by_reduction;
481 break;
482
483 case vect_used_by_reduction:
484 if (gimple_code (stmt) == GIMPLE_PHI)
485 break;
486 /* fall through */
487
488 default:
489 if (vect_print_dump_info (REPORT_DETAILS))
490 fprintf (vect_dump, "unsupported use of reduction.");
491
492 VEC_free (gimple, heap, worklist);
493 return false;
494 }
495
b8698a0f 496 live_p = false;
06066f92 497 break;
b8698a0f 498
06066f92
IR
499 case vect_nested_cycle:
500 if (tmp_relevant != vect_unused_in_scope
501 && tmp_relevant != vect_used_in_outer_by_reduction
502 && tmp_relevant != vect_used_in_outer)
503 {
504 if (vect_print_dump_info (REPORT_DETAILS))
505 fprintf (vect_dump, "unsupported use of nested cycle.");
7c5222ff 506
06066f92
IR
507 VEC_free (gimple, heap, worklist);
508 return false;
509 }
7c5222ff 510
b8698a0f
L
511 live_p = false;
512 break;
513
06066f92
IR
514 case vect_double_reduction_def:
515 if (tmp_relevant != vect_unused_in_scope
516 && tmp_relevant != vect_used_by_reduction)
517 {
7c5222ff 518 if (vect_print_dump_info (REPORT_DETAILS))
06066f92 519 fprintf (vect_dump, "unsupported use of double reduction.");
7c5222ff
IR
520
521 VEC_free (gimple, heap, worklist);
522 return false;
06066f92
IR
523 }
524
525 live_p = false;
b8698a0f 526 break;
7c5222ff 527
06066f92
IR
528 default:
529 break;
7c5222ff 530 }
b8698a0f 531
ebfd146a
IR
532 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
533 {
534 tree op = USE_FROM_PTR (use_p);
535 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
536 {
537 VEC_free (gimple, heap, worklist);
538 return false;
539 }
540 }
541 } /* while worklist */
542
543 VEC_free (gimple, heap, worklist);
544 return true;
545}
546
547
720f5239
IR
548/* Get cost by calling cost target builtin. */
549
550static inline
551int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
552{
553 tree dummy_type = NULL;
554 int dummy = 0;
555
556 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
557 dummy_type, dummy);
558}
559
ff802fa1
IR
560
561/* Get cost for STMT. */
562
ebfd146a
IR
563int
564cost_for_stmt (gimple stmt)
565{
566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
567
568 switch (STMT_VINFO_TYPE (stmt_info))
569 {
570 case load_vec_info_type:
720f5239 571 return vect_get_stmt_cost (scalar_load);
ebfd146a 572 case store_vec_info_type:
720f5239 573 return vect_get_stmt_cost (scalar_store);
ebfd146a
IR
574 case op_vec_info_type:
575 case condition_vec_info_type:
576 case assignment_vec_info_type:
577 case reduc_vec_info_type:
578 case induc_vec_info_type:
579 case type_promotion_vec_info_type:
580 case type_demotion_vec_info_type:
581 case type_conversion_vec_info_type:
582 case call_vec_info_type:
720f5239 583 return vect_get_stmt_cost (scalar_stmt);
ebfd146a
IR
584 case undef_vec_info_type:
585 default:
586 gcc_unreachable ();
587 }
588}
589
b8698a0f 590/* Function vect_model_simple_cost.
ebfd146a 591
b8698a0f 592 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
593 single op. Right now, this does not account for multiple insns that could
594 be generated for the single vector op. We will handle that shortly. */
595
596void
b8698a0f 597vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
ebfd146a
IR
598 enum vect_def_type *dt, slp_tree slp_node)
599{
600 int i;
601 int inside_cost = 0, outside_cost = 0;
602
603 /* The SLP costs were already calculated during SLP tree build. */
604 if (PURE_SLP_STMT (stmt_info))
605 return;
606
720f5239 607 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
608
609 /* FORNOW: Assuming maximum 2 args per stmts. */
610 for (i = 0; i < 2; i++)
611 {
8644a673 612 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
720f5239 613 outside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a 614 }
b8698a0f 615
ebfd146a
IR
616 if (vect_print_dump_info (REPORT_COST))
617 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
618 "outside_cost = %d .", inside_cost, outside_cost);
619
620 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
621 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
622 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
623}
624
625
b8698a0f
L
626/* Function vect_cost_strided_group_size
627
ebfd146a
IR
628 For strided load or store, return the group_size only if it is the first
629 load or store of a group, else return 1. This ensures that group size is
630 only returned once per group. */
631
632static int
633vect_cost_strided_group_size (stmt_vec_info stmt_info)
634{
635 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
636
637 if (first_stmt == STMT_VINFO_STMT (stmt_info))
638 return DR_GROUP_SIZE (stmt_info);
639
640 return 1;
641}
642
643
644/* Function vect_model_store_cost
645
646 Models cost for stores. In the case of strided accesses, one access
647 has the overhead of the strided access attributed to it. */
648
649void
b8698a0f 650vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
ebfd146a
IR
651 enum vect_def_type dt, slp_tree slp_node)
652{
653 int group_size;
720f5239
IR
654 unsigned int inside_cost = 0, outside_cost = 0;
655 struct data_reference *first_dr;
656 gimple first_stmt;
ebfd146a
IR
657
658 /* The SLP costs were already calculated during SLP tree build. */
659 if (PURE_SLP_STMT (stmt_info))
660 return;
661
8644a673 662 if (dt == vect_constant_def || dt == vect_external_def)
720f5239 663 outside_cost = vect_get_stmt_cost (scalar_to_vec);
ebfd146a
IR
664
665 /* Strided access? */
720f5239
IR
666 if (DR_GROUP_FIRST_DR (stmt_info))
667 {
668 if (slp_node)
669 {
670 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
671 group_size = 1;
672 }
673 else
674 {
675 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
676 group_size = vect_cost_strided_group_size (stmt_info);
677 }
678
679 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
680 }
ebfd146a
IR
681 /* Not a strided access. */
682 else
720f5239
IR
683 {
684 group_size = 1;
685 first_dr = STMT_VINFO_DATA_REF (stmt_info);
686 }
ebfd146a 687
b8698a0f 688 /* Is this an access in a group of stores, which provide strided access?
ebfd146a 689 If so, add in the cost of the permutes. */
b8698a0f 690 if (group_size > 1)
ebfd146a
IR
691 {
692 /* Uses a high and low interleave operation for each needed permute. */
b8698a0f 693 inside_cost = ncopies * exact_log2(group_size) * group_size
720f5239 694 * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
695
696 if (vect_print_dump_info (REPORT_COST))
697 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
698 group_size);
699
700 }
701
702 /* Costs of the stores. */
720f5239 703 vect_get_store_cost (first_dr, ncopies, &inside_cost);
ebfd146a
IR
704
705 if (vect_print_dump_info (REPORT_COST))
706 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
707 "outside_cost = %d .", inside_cost, outside_cost);
708
709 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
710 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
711 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
712}
713
714
720f5239
IR
715/* Calculate cost of DR's memory access. */
716void
717vect_get_store_cost (struct data_reference *dr, int ncopies,
718 unsigned int *inside_cost)
719{
720 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
721
722 switch (alignment_support_scheme)
723 {
724 case dr_aligned:
725 {
726 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
727
728 if (vect_print_dump_info (REPORT_COST))
729 fprintf (vect_dump, "vect_model_store_cost: aligned.");
730
731 break;
732 }
733
734 case dr_unaligned_supported:
735 {
736 gimple stmt = DR_STMT (dr);
737 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
738 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
739
740 /* Here, we assign an additional cost for the unaligned store. */
741 *inside_cost += ncopies
742 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
743 vectype, DR_MISALIGNMENT (dr));
744
745 if (vect_print_dump_info (REPORT_COST))
746 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
747 "hardware.");
748
749 break;
750 }
751
752 default:
753 gcc_unreachable ();
754 }
755}
756
757
ebfd146a
IR
758/* Function vect_model_load_cost
759
760 Models cost for loads. In the case of strided accesses, the last access
761 has the overhead of the strided access attributed to it. Since unaligned
b8698a0f 762 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
763 access scheme chosen. */
764
765void
766vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
b8698a0f 767
ebfd146a
IR
768{
769 int group_size;
ebfd146a
IR
770 gimple first_stmt;
771 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
720f5239 772 unsigned int inside_cost = 0, outside_cost = 0;
ebfd146a
IR
773
774 /* The SLP costs were already calculated during SLP tree build. */
775 if (PURE_SLP_STMT (stmt_info))
776 return;
777
778 /* Strided accesses? */
779 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
780 if (first_stmt && !slp_node)
781 {
782 group_size = vect_cost_strided_group_size (stmt_info);
783 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
784 }
785 /* Not a strided access. */
786 else
787 {
788 group_size = 1;
789 first_dr = dr;
790 }
791
b8698a0f 792 /* Is this an access in a group of loads providing strided access?
ebfd146a 793 If so, add in the cost of the permutes. */
b8698a0f 794 if (group_size > 1)
ebfd146a
IR
795 {
796 /* Uses an even and odd extract operations for each needed permute. */
797 inside_cost = ncopies * exact_log2(group_size) * group_size
720f5239 798 * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
799
800 if (vect_print_dump_info (REPORT_COST))
801 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
802 group_size);
ebfd146a
IR
803 }
804
805 /* The loads themselves. */
720f5239
IR
806 vect_get_load_cost (first_dr, ncopies,
807 ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node),
808 &inside_cost, &outside_cost);
809
810 if (vect_print_dump_info (REPORT_COST))
811 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
812 "outside_cost = %d .", inside_cost, outside_cost);
813
814 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
815 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
816 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
817}
818
819
820/* Calculate cost of DR's memory access. */
821void
822vect_get_load_cost (struct data_reference *dr, int ncopies,
823 bool add_realign_cost, unsigned int *inside_cost,
824 unsigned int *outside_cost)
825{
826 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
827
828 switch (alignment_support_scheme)
ebfd146a
IR
829 {
830 case dr_aligned:
831 {
9940b13c 832 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
ebfd146a
IR
833
834 if (vect_print_dump_info (REPORT_COST))
835 fprintf (vect_dump, "vect_model_load_cost: aligned.");
836
837 break;
838 }
839 case dr_unaligned_supported:
840 {
720f5239
IR
841 gimple stmt = DR_STMT (dr);
842 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
843 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 844
720f5239
IR
845 /* Here, we assign an additional cost for the unaligned load. */
846 *inside_cost += ncopies
847 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
848 vectype, DR_MISALIGNMENT (dr));
ebfd146a
IR
849 if (vect_print_dump_info (REPORT_COST))
850 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
851 "hardware.");
852
853 break;
854 }
855 case dr_explicit_realign:
856 {
720f5239
IR
857 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
858 + vect_get_stmt_cost (vector_stmt));
ebfd146a
IR
859
860 /* FIXME: If the misalignment remains fixed across the iterations of
861 the containing loop, the following cost should be added to the
862 outside costs. */
863 if (targetm.vectorize.builtin_mask_for_load)
720f5239 864 *inside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
865
866 break;
867 }
868 case dr_explicit_realign_optimized:
869 {
870 if (vect_print_dump_info (REPORT_COST))
871 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
872 "pipelined.");
873
874 /* Unaligned software pipeline has a load of an address, an initial
ff802fa1 875 load, and possibly a mask operation to "prime" the loop. However,
ebfd146a
IR
876 if this is an access in a group of loads, which provide strided
877 access, then the above cost should only be considered for one
ff802fa1 878 access in the group. Inside the loop, there is a load op
ebfd146a
IR
879 and a realignment op. */
880
720f5239 881 if (add_realign_cost)
ebfd146a 882 {
720f5239 883 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
ebfd146a 884 if (targetm.vectorize.builtin_mask_for_load)
720f5239 885 *outside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
886 }
887
720f5239
IR
888 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
889 + vect_get_stmt_cost (vector_stmt));
ebfd146a
IR
890 break;
891 }
892
893 default:
894 gcc_unreachable ();
895 }
ebfd146a
IR
896}
897
898
899/* Function vect_init_vector.
900
901 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
ff802fa1
IR
902 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
903 is not NULL. Otherwise, place the initialization at the loop preheader.
b8698a0f 904 Return the DEF of INIT_STMT.
ebfd146a
IR
905 It will be used in the vectorization of STMT. */
906
907tree
908vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
909 gimple_stmt_iterator *gsi)
910{
911 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
912 tree new_var;
913 gimple init_stmt;
914 tree vec_oprnd;
915 edge pe;
916 tree new_temp;
917 basic_block new_bb;
b8698a0f 918
ebfd146a 919 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
b8698a0f 920 add_referenced_var (new_var);
ebfd146a
IR
921 init_stmt = gimple_build_assign (new_var, vector_var);
922 new_temp = make_ssa_name (new_var, init_stmt);
923 gimple_assign_set_lhs (init_stmt, new_temp);
924
925 if (gsi)
926 vect_finish_stmt_generation (stmt, init_stmt, gsi);
927 else
928 {
929 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 930
a70d6342
IR
931 if (loop_vinfo)
932 {
933 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
934
935 if (nested_in_vect_loop_p (loop, stmt))
936 loop = loop->inner;
b8698a0f 937
a70d6342
IR
938 pe = loop_preheader_edge (loop);
939 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
940 gcc_assert (!new_bb);
941 }
942 else
943 {
944 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
945 basic_block bb;
946 gimple_stmt_iterator gsi_bb_start;
947
948 gcc_assert (bb_vinfo);
949 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 950 gsi_bb_start = gsi_after_labels (bb);
a70d6342
IR
951 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
952 }
ebfd146a
IR
953 }
954
955 if (vect_print_dump_info (REPORT_DETAILS))
956 {
957 fprintf (vect_dump, "created new init_stmt: ");
958 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
959 }
960
961 vec_oprnd = gimple_assign_lhs (init_stmt);
962 return vec_oprnd;
963}
964
a70d6342 965
ebfd146a
IR
966/* Function vect_get_vec_def_for_operand.
967
ff802fa1 968 OP is an operand in STMT. This function returns a (vector) def that will be
ebfd146a
IR
969 used in the vectorized stmt for STMT.
970
971 In the case that OP is an SSA_NAME which is defined in the loop, then
972 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
973
974 In case OP is an invariant or constant, a new stmt that creates a vector def
975 needs to be introduced. */
976
977tree
978vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
979{
980 tree vec_oprnd;
981 gimple vec_stmt;
982 gimple def_stmt;
983 stmt_vec_info def_stmt_info = NULL;
984 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
9dc3f7de 985 unsigned int nunits;
ebfd146a
IR
986 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
987 tree vec_inv;
988 tree vec_cst;
e7e9eb2f 989 tree t = NULL_TREE;
ebfd146a 990 tree def;
e7e9eb2f 991 int i;
ebfd146a
IR
992 enum vect_def_type dt;
993 bool is_simple_use;
994 tree vector_type;
995
996 if (vect_print_dump_info (REPORT_DETAILS))
997 {
998 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
999 print_generic_expr (vect_dump, op, TDF_SLIM);
1000 }
1001
b8698a0f 1002 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
a70d6342 1003 &dt);
ebfd146a
IR
1004 gcc_assert (is_simple_use);
1005 if (vect_print_dump_info (REPORT_DETAILS))
1006 {
1007 if (def)
1008 {
1009 fprintf (vect_dump, "def = ");
1010 print_generic_expr (vect_dump, def, TDF_SLIM);
1011 }
1012 if (def_stmt)
1013 {
1014 fprintf (vect_dump, " def_stmt = ");
1015 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1016 }
1017 }
1018
1019 switch (dt)
1020 {
1021 /* Case 1: operand is a constant. */
1022 case vect_constant_def:
1023 {
7569a6cc
RG
1024 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1025 gcc_assert (vector_type);
9dc3f7de 1026 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
7569a6cc 1027
b8698a0f 1028 if (scalar_def)
ebfd146a
IR
1029 *scalar_def = op;
1030
1031 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1032 if (vect_print_dump_info (REPORT_DETAILS))
1033 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1034
b9acc9f1 1035 vec_cst = build_vector_from_val (vector_type, op);
7569a6cc 1036 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
ebfd146a
IR
1037 }
1038
1039 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1040 case vect_external_def:
ebfd146a
IR
1041 {
1042 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1043 gcc_assert (vector_type);
1044 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1045
b8698a0f 1046 if (scalar_def)
ebfd146a
IR
1047 *scalar_def = def;
1048
1049 /* Create 'vec_inv = {inv,inv,..,inv}' */
1050 if (vect_print_dump_info (REPORT_DETAILS))
1051 fprintf (vect_dump, "Create vector_inv.");
1052
e7e9eb2f
NF
1053 for (i = nunits - 1; i >= 0; --i)
1054 {
1055 t = tree_cons (NULL_TREE, def, t);
1056 }
1057
1058 /* FIXME: use build_constructor directly. */
1059 vec_inv = build_constructor_from_list (vector_type, t);
ebfd146a
IR
1060 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1061 }
1062
1063 /* Case 3: operand is defined inside the loop. */
8644a673 1064 case vect_internal_def:
ebfd146a 1065 {
b8698a0f 1066 if (scalar_def)
ebfd146a
IR
1067 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1068
1069 /* Get the def from the vectorized stmt. */
1070 def_stmt_info = vinfo_for_stmt (def_stmt);
1071 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1072 gcc_assert (vec_stmt);
1073 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1074 vec_oprnd = PHI_RESULT (vec_stmt);
1075 else if (is_gimple_call (vec_stmt))
1076 vec_oprnd = gimple_call_lhs (vec_stmt);
1077 else
1078 vec_oprnd = gimple_assign_lhs (vec_stmt);
1079 return vec_oprnd;
1080 }
1081
1082 /* Case 4: operand is defined by a loop header phi - reduction */
1083 case vect_reduction_def:
06066f92 1084 case vect_double_reduction_def:
7c5222ff 1085 case vect_nested_cycle:
ebfd146a
IR
1086 {
1087 struct loop *loop;
1088
1089 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1090 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1091
1092 /* Get the def before the loop */
1093 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1094 return get_initial_def_for_reduction (stmt, op, scalar_def);
1095 }
1096
1097 /* Case 5: operand is defined by loop-header phi - induction. */
1098 case vect_induction_def:
1099 {
1100 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1101
1102 /* Get the def from the vectorized stmt. */
1103 def_stmt_info = vinfo_for_stmt (def_stmt);
1104 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1105 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
1106 vec_oprnd = PHI_RESULT (vec_stmt);
1107 return vec_oprnd;
1108 }
1109
1110 default:
1111 gcc_unreachable ();
1112 }
1113}
1114
1115
1116/* Function vect_get_vec_def_for_stmt_copy
1117
ff802fa1 1118 Return a vector-def for an operand. This function is used when the
b8698a0f
L
1119 vectorized stmt to be created (by the caller to this function) is a "copy"
1120 created in case the vectorized result cannot fit in one vector, and several
ff802fa1 1121 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1122 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1123 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1124 DT is the type of the vector def VEC_OPRND.
1125
1126 Context:
1127 In case the vectorization factor (VF) is bigger than the number
1128 of elements that can fit in a vectype (nunits), we have to generate
ff802fa1 1129 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1130 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1131 smallest data-type determines the VF, and as a result, when vectorizing
1132 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1133 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1134 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1135 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1136 which VF=16 and nunits=4, so the number of copies required is 4):
1137
1138 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1139
ebfd146a
IR
1140 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1141 VS1.1: vx.1 = memref1 VS1.2
1142 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1143 VS1.3: vx.3 = memref3
ebfd146a
IR
1144
1145 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1146 VSnew.1: vz1 = vx.1 + ... VSnew.2
1147 VSnew.2: vz2 = vx.2 + ... VSnew.3
1148 VSnew.3: vz3 = vx.3 + ...
1149
1150 The vectorization of S1 is explained in vectorizable_load.
1151 The vectorization of S2:
b8698a0f
L
1152 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1153 the function 'vect_get_vec_def_for_operand' is called to
ff802fa1 1154 get the relevant vector-def for each operand of S2. For operand x it
ebfd146a
IR
1155 returns the vector-def 'vx.0'.
1156
b8698a0f
L
1157 To create the remaining copies of the vector-stmt (VSnew.j), this
1158 function is called to get the relevant vector-def for each operand. It is
1159 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1160 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1161
b8698a0f
L
1162 For example, to obtain the vector-def 'vx.1' in order to create the
1163 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1164 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1165 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1166 and return its def ('vx.1').
1167 Overall, to create the above sequence this function will be called 3 times:
1168 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1169 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1170 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1171
1172tree
1173vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1174{
1175 gimple vec_stmt_for_operand;
1176 stmt_vec_info def_stmt_info;
1177
1178 /* Do nothing; can reuse same def. */
8644a673 1179 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1180 return vec_oprnd;
1181
1182 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1183 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1184 gcc_assert (def_stmt_info);
1185 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1186 gcc_assert (vec_stmt_for_operand);
1187 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1188 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1189 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1190 else
1191 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1192 return vec_oprnd;
1193}
1194
1195
1196/* Get vectorized definitions for the operands to create a copy of an original
ff802fa1 1197 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
ebfd146a
IR
1198
1199static void
b8698a0f
L
1200vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1201 VEC(tree,heap) **vec_oprnds0,
ebfd146a
IR
1202 VEC(tree,heap) **vec_oprnds1)
1203{
1204 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1205
1206 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1207 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1208
1209 if (vec_oprnds1 && *vec_oprnds1)
1210 {
1211 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1212 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1213 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1214 }
1215}
1216
1217
ff802fa1
IR
1218/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
1219 NULL. */
ebfd146a
IR
1220
1221static void
1222vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1223 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1224 slp_tree slp_node)
1225{
1226 if (slp_node)
9dc3f7de 1227 vect_get_slp_defs (op0, op1, slp_node, vec_oprnds0, vec_oprnds1, -1);
ebfd146a
IR
1228 else
1229 {
1230 tree vec_oprnd;
1231
b8698a0f
L
1232 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1233 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
ebfd146a
IR
1234 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1235
1236 if (op1)
1237 {
b8698a0f
L
1238 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1239 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
ebfd146a
IR
1240 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1241 }
1242 }
1243}
1244
1245
1246/* Function vect_finish_stmt_generation.
1247
1248 Insert a new stmt. */
1249
1250void
1251vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1252 gimple_stmt_iterator *gsi)
1253{
1254 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1256 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1257
1258 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1259
1260 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1261
b8698a0f 1262 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1263 bb_vinfo));
ebfd146a
IR
1264
1265 if (vect_print_dump_info (REPORT_DETAILS))
1266 {
1267 fprintf (vect_dump, "add new stmt: ");
1268 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1269 }
1270
1271 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1272}
1273
1274/* Checks if CALL can be vectorized in type VECTYPE. Returns
1275 a function declaration if the target has a vectorized version
1276 of the function, or NULL_TREE if the function cannot be vectorized. */
1277
1278tree
1279vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1280{
1281 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1282
1283 /* We only handle functions that do not read or clobber memory -- i.e.
1284 const or novops ones. */
1285 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1286 return NULL_TREE;
1287
1288 if (!fndecl
1289 || TREE_CODE (fndecl) != FUNCTION_DECL
1290 || !DECL_BUILT_IN (fndecl))
1291 return NULL_TREE;
1292
62f7fd21 1293 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1294 vectype_in);
1295}
1296
1297/* Function vectorizable_call.
1298
b8698a0f
L
1299 Check if STMT performs a function call that can be vectorized.
1300 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1301 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1302 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1303
1304static bool
1305vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1306{
1307 tree vec_dest;
1308 tree scalar_dest;
1309 tree op, type;
1310 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1311 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1312 tree vectype_out, vectype_in;
1313 int nunits_in;
1314 int nunits_out;
1315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
b690cc0f 1316 tree fndecl, new_temp, def, rhs_type;
ebfd146a 1317 gimple def_stmt;
0502fb85
UB
1318 enum vect_def_type dt[3]
1319 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1320 gimple new_stmt = NULL;
ebfd146a
IR
1321 int ncopies, j;
1322 VEC(tree, heap) *vargs = NULL;
1323 enum { NARROW, NONE, WIDEN } modifier;
1324 size_t i, nargs;
1325
a70d6342
IR
1326 /* FORNOW: unsupported in basic block SLP. */
1327 gcc_assert (loop_vinfo);
b8698a0f 1328
ebfd146a
IR
1329 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1330 return false;
1331
8644a673 1332 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1333 return false;
1334
1335 /* FORNOW: SLP not supported. */
1336 if (STMT_SLP_TYPE (stmt_info))
1337 return false;
1338
1339 /* Is STMT a vectorizable call? */
1340 if (!is_gimple_call (stmt))
1341 return false;
1342
1343 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1344 return false;
1345
5a2c1986
IR
1346 if (stmt_could_throw_p (stmt))
1347 return false;
1348
b690cc0f
RG
1349 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1350
ebfd146a
IR
1351 /* Process function arguments. */
1352 rhs_type = NULL_TREE;
b690cc0f 1353 vectype_in = NULL_TREE;
ebfd146a
IR
1354 nargs = gimple_call_num_args (stmt);
1355
1b1562a5
MM
1356 /* Bail out if the function has more than three arguments, we do not have
1357 interesting builtin functions to vectorize with more than two arguments
1358 except for fma. No arguments is also not good. */
1359 if (nargs == 0 || nargs > 3)
ebfd146a
IR
1360 return false;
1361
1362 for (i = 0; i < nargs; i++)
1363 {
b690cc0f
RG
1364 tree opvectype;
1365
ebfd146a
IR
1366 op = gimple_call_arg (stmt, i);
1367
1368 /* We can only handle calls with arguments of the same type. */
1369 if (rhs_type
8533c9d8 1370 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a
IR
1371 {
1372 if (vect_print_dump_info (REPORT_DETAILS))
1373 fprintf (vect_dump, "argument types differ.");
1374 return false;
1375 }
b690cc0f
RG
1376 if (!rhs_type)
1377 rhs_type = TREE_TYPE (op);
ebfd146a 1378
b690cc0f
RG
1379 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1380 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a
IR
1381 {
1382 if (vect_print_dump_info (REPORT_DETAILS))
1383 fprintf (vect_dump, "use not simple.");
1384 return false;
1385 }
ebfd146a 1386
b690cc0f
RG
1387 if (!vectype_in)
1388 vectype_in = opvectype;
1389 else if (opvectype
1390 && opvectype != vectype_in)
1391 {
1392 if (vect_print_dump_info (REPORT_DETAILS))
1393 fprintf (vect_dump, "argument vector types differ.");
1394 return false;
1395 }
1396 }
1397 /* If all arguments are external or constant defs use a vector type with
1398 the same size as the output vector type. */
ebfd146a 1399 if (!vectype_in)
b690cc0f 1400 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1401 if (vec_stmt)
1402 gcc_assert (vectype_in);
1403 if (!vectype_in)
1404 {
1405 if (vect_print_dump_info (REPORT_DETAILS))
1406 {
1407 fprintf (vect_dump, "no vectype for scalar type ");
1408 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1409 }
1410
1411 return false;
1412 }
ebfd146a
IR
1413
1414 /* FORNOW */
b690cc0f
RG
1415 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1416 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1417 if (nunits_in == nunits_out / 2)
1418 modifier = NARROW;
1419 else if (nunits_out == nunits_in)
1420 modifier = NONE;
1421 else if (nunits_out == nunits_in / 2)
1422 modifier = WIDEN;
1423 else
1424 return false;
1425
1426 /* For now, we only vectorize functions if a target specific builtin
1427 is available. TODO -- in some cases, it might be profitable to
1428 insert the calls for pieces of the vector, in order to be able
1429 to vectorize other operations in the loop. */
1430 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1431 if (fndecl == NULL_TREE)
1432 {
1433 if (vect_print_dump_info (REPORT_DETAILS))
1434 fprintf (vect_dump, "function is not vectorizable.");
1435
1436 return false;
1437 }
1438
5006671f 1439 gcc_assert (!gimple_vuse (stmt));
ebfd146a
IR
1440
1441 if (modifier == NARROW)
1442 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1443 else
1444 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1445
1446 /* Sanity check: make sure that at least one copy of the vectorized stmt
1447 needs to be generated. */
1448 gcc_assert (ncopies >= 1);
1449
1450 if (!vec_stmt) /* transformation not required. */
1451 {
1452 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1453 if (vect_print_dump_info (REPORT_DETAILS))
1454 fprintf (vect_dump, "=== vectorizable_call ===");
1455 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1456 return true;
1457 }
1458
1459 /** Transform. **/
1460
1461 if (vect_print_dump_info (REPORT_DETAILS))
1462 fprintf (vect_dump, "transform operation.");
1463
1464 /* Handle def. */
1465 scalar_dest = gimple_call_lhs (stmt);
1466 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1467
1468 prev_stmt_info = NULL;
1469 switch (modifier)
1470 {
1471 case NONE:
1472 for (j = 0; j < ncopies; ++j)
1473 {
1474 /* Build argument list for the vectorized call. */
1475 if (j == 0)
1476 vargs = VEC_alloc (tree, heap, nargs);
1477 else
1478 VEC_truncate (tree, vargs, 0);
1479
1480 for (i = 0; i < nargs; i++)
1481 {
1482 op = gimple_call_arg (stmt, i);
1483 if (j == 0)
1484 vec_oprnd0
1485 = vect_get_vec_def_for_operand (op, stmt, NULL);
1486 else
63827fb8
IR
1487 {
1488 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1489 vec_oprnd0
1490 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1491 }
ebfd146a
IR
1492
1493 VEC_quick_push (tree, vargs, vec_oprnd0);
1494 }
1495
1496 new_stmt = gimple_build_call_vec (fndecl, vargs);
1497 new_temp = make_ssa_name (vec_dest, new_stmt);
1498 gimple_call_set_lhs (new_stmt, new_temp);
1499
1500 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7411b8f0 1501 mark_symbols_for_renaming (new_stmt);
ebfd146a
IR
1502
1503 if (j == 0)
1504 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1505 else
1506 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1507
1508 prev_stmt_info = vinfo_for_stmt (new_stmt);
1509 }
1510
1511 break;
1512
1513 case NARROW:
1514 for (j = 0; j < ncopies; ++j)
1515 {
1516 /* Build argument list for the vectorized call. */
1517 if (j == 0)
1518 vargs = VEC_alloc (tree, heap, nargs * 2);
1519 else
1520 VEC_truncate (tree, vargs, 0);
1521
1522 for (i = 0; i < nargs; i++)
1523 {
1524 op = gimple_call_arg (stmt, i);
1525 if (j == 0)
1526 {
1527 vec_oprnd0
1528 = vect_get_vec_def_for_operand (op, stmt, NULL);
1529 vec_oprnd1
63827fb8 1530 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
1531 }
1532 else
1533 {
63827fb8 1534 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i);
ebfd146a 1535 vec_oprnd0
63827fb8 1536 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 1537 vec_oprnd1
63827fb8 1538 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
1539 }
1540
1541 VEC_quick_push (tree, vargs, vec_oprnd0);
1542 VEC_quick_push (tree, vargs, vec_oprnd1);
1543 }
1544
1545 new_stmt = gimple_build_call_vec (fndecl, vargs);
1546 new_temp = make_ssa_name (vec_dest, new_stmt);
1547 gimple_call_set_lhs (new_stmt, new_temp);
1548
1549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7411b8f0 1550 mark_symbols_for_renaming (new_stmt);
ebfd146a
IR
1551
1552 if (j == 0)
1553 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1554 else
1555 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1556
1557 prev_stmt_info = vinfo_for_stmt (new_stmt);
1558 }
1559
1560 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1561
1562 break;
1563
1564 case WIDEN:
1565 /* No current target implements this case. */
1566 return false;
1567 }
1568
1569 VEC_free (tree, heap, vargs);
1570
1571 /* Update the exception handling table with the vector stmt if necessary. */
1572 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1573 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1574
1575 /* The call in STMT might prevent it from being removed in dce.
1576 We however cannot remove it here, due to the way the ssa name
1577 it defines is mapped to the new definition. So just replace
1578 rhs of the statement with something harmless. */
1579
1580 type = TREE_TYPE (scalar_dest);
1581 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
e8160c9a 1582 build_zero_cst (type));
ebfd146a
IR
1583 set_vinfo_for_stmt (new_stmt, stmt_info);
1584 set_vinfo_for_stmt (stmt, NULL);
1585 STMT_VINFO_STMT (stmt_info) = new_stmt;
1586 gsi_replace (gsi, new_stmt, false);
1587 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1588
1589 return true;
1590}
1591
1592
1593/* Function vect_gen_widened_results_half
1594
1595 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 1596 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ff802fa1 1597 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
ebfd146a
IR
1598 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1599 needs to be created (DECL is a function-decl of a target-builtin).
1600 STMT is the original scalar stmt that we are vectorizing. */
1601
1602static gimple
1603vect_gen_widened_results_half (enum tree_code code,
1604 tree decl,
1605 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1606 tree vec_dest, gimple_stmt_iterator *gsi,
1607 gimple stmt)
b8698a0f 1608{
ebfd146a 1609 gimple new_stmt;
b8698a0f
L
1610 tree new_temp;
1611
1612 /* Generate half of the widened result: */
1613 if (code == CALL_EXPR)
1614 {
1615 /* Target specific support */
ebfd146a
IR
1616 if (op_type == binary_op)
1617 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1618 else
1619 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1620 new_temp = make_ssa_name (vec_dest, new_stmt);
1621 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
1622 }
1623 else
ebfd146a 1624 {
b8698a0f
L
1625 /* Generic support */
1626 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
1627 if (op_type != binary_op)
1628 vec_oprnd1 = NULL;
1629 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1630 vec_oprnd1);
1631 new_temp = make_ssa_name (vec_dest, new_stmt);
1632 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 1633 }
ebfd146a
IR
1634 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1635
ebfd146a
IR
1636 return new_stmt;
1637}
1638
1639
b8698a0f
L
1640/* Check if STMT performs a conversion operation, that can be vectorized.
1641 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1642 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1643 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1644
1645static bool
1646vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1647 gimple *vec_stmt, slp_tree slp_node)
1648{
1649 tree vec_dest;
1650 tree scalar_dest;
1651 tree op0;
1652 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1653 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1654 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1655 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1656 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1657 tree new_temp;
1658 tree def;
1659 gimple def_stmt;
1660 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1661 gimple new_stmt = NULL;
1662 stmt_vec_info prev_stmt_info;
1663 int nunits_in;
1664 int nunits_out;
1665 tree vectype_out, vectype_in;
1666 int ncopies, j;
b690cc0f 1667 tree rhs_type;
ebfd146a
IR
1668 tree builtin_decl;
1669 enum { NARROW, NONE, WIDEN } modifier;
1670 int i;
1671 VEC(tree,heap) *vec_oprnds0 = NULL;
1672 tree vop0;
ebfd146a
IR
1673 VEC(tree,heap) *dummy = NULL;
1674 int dummy_int;
1675
1676 /* Is STMT a vectorizable conversion? */
1677
a70d6342
IR
1678 /* FORNOW: unsupported in basic block SLP. */
1679 gcc_assert (loop_vinfo);
b8698a0f 1680
ebfd146a
IR
1681 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1682 return false;
1683
8644a673 1684 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1685 return false;
1686
1687 if (!is_gimple_assign (stmt))
1688 return false;
1689
1690 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1691 return false;
1692
1693 code = gimple_assign_rhs_code (stmt);
1694 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1695 return false;
1696
1697 /* Check types of lhs and rhs. */
b690cc0f
RG
1698 scalar_dest = gimple_assign_lhs (stmt);
1699 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1700
ebfd146a
IR
1701 op0 = gimple_assign_rhs1 (stmt);
1702 rhs_type = TREE_TYPE (op0);
b690cc0f
RG
1703 /* Check the operands of the operation. */
1704 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1705 &def_stmt, &def, &dt[0], &vectype_in))
1706 {
1707 if (vect_print_dump_info (REPORT_DETAILS))
1708 fprintf (vect_dump, "use not simple.");
1709 return false;
1710 }
1711 /* If op0 is an external or constant defs use a vector type of
1712 the same size as the output vector type. */
ebfd146a 1713 if (!vectype_in)
b690cc0f 1714 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1715 if (vec_stmt)
1716 gcc_assert (vectype_in);
1717 if (!vectype_in)
1718 {
1719 if (vect_print_dump_info (REPORT_DETAILS))
1720 {
1721 fprintf (vect_dump, "no vectype for scalar type ");
1722 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1723 }
1724
1725 return false;
1726 }
ebfd146a
IR
1727
1728 /* FORNOW */
b690cc0f
RG
1729 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1730 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1731 if (nunits_in == nunits_out / 2)
1732 modifier = NARROW;
1733 else if (nunits_out == nunits_in)
1734 modifier = NONE;
1735 else if (nunits_out == nunits_in / 2)
1736 modifier = WIDEN;
1737 else
1738 return false;
1739
ebfd146a
IR
1740 if (modifier == NARROW)
1741 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1742 else
1743 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1744
ff802fa1
IR
1745 /* Multiple types in SLP are handled by creating the appropriate number of
1746 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1747 case of SLP. */
ebfd146a
IR
1748 if (slp_node)
1749 ncopies = 1;
b8698a0f 1750
ebfd146a
IR
1751 /* Sanity check: make sure that at least one copy of the vectorized stmt
1752 needs to be generated. */
1753 gcc_assert (ncopies >= 1);
1754
ebfd146a
IR
1755 /* Supportable by target? */
1756 if ((modifier == NONE
88dd7150 1757 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
ebfd146a 1758 || (modifier == WIDEN
b690cc0f
RG
1759 && !supportable_widening_operation (code, stmt,
1760 vectype_out, vectype_in,
ebfd146a
IR
1761 &decl1, &decl2,
1762 &code1, &code2,
1763 &dummy_int, &dummy))
1764 || (modifier == NARROW
b690cc0f 1765 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
ebfd146a
IR
1766 &code1, &dummy_int, &dummy)))
1767 {
1768 if (vect_print_dump_info (REPORT_DETAILS))
1769 fprintf (vect_dump, "conversion not supported by target.");
1770 return false;
1771 }
1772
1773 if (modifier != NONE)
1774 {
ebfd146a
IR
1775 /* FORNOW: SLP not supported. */
1776 if (STMT_SLP_TYPE (stmt_info))
b8698a0f 1777 return false;
ebfd146a
IR
1778 }
1779
1780 if (!vec_stmt) /* transformation not required. */
1781 {
1782 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1783 return true;
1784 }
1785
1786 /** Transform. **/
1787 if (vect_print_dump_info (REPORT_DETAILS))
1788 fprintf (vect_dump, "transform conversion.");
1789
1790 /* Handle def. */
1791 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1792
1793 if (modifier == NONE && !slp_node)
1794 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1795
1796 prev_stmt_info = NULL;
1797 switch (modifier)
1798 {
1799 case NONE:
1800 for (j = 0; j < ncopies; j++)
1801 {
ebfd146a 1802 if (j == 0)
b8698a0f 1803 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
1804 else
1805 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1806
1807 builtin_decl =
88dd7150
RG
1808 targetm.vectorize.builtin_conversion (code,
1809 vectype_out, vectype_in);
ac47786e 1810 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
b8698a0f 1811 {
ebfd146a
IR
1812 /* Arguments are ready. create the new vector stmt. */
1813 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1814 new_temp = make_ssa_name (vec_dest, new_stmt);
1815 gimple_call_set_lhs (new_stmt, new_temp);
1816 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1817 if (slp_node)
1818 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1819 }
1820
1821 if (j == 0)
1822 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1823 else
1824 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1825 prev_stmt_info = vinfo_for_stmt (new_stmt);
1826 }
1827 break;
1828
1829 case WIDEN:
1830 /* In case the vectorization factor (VF) is bigger than the number
1831 of elements that we can fit in a vectype (nunits), we have to
1832 generate more than one vector stmt - i.e - we need to "unroll"
1833 the vector stmt by a factor VF/nunits. */
1834 for (j = 0; j < ncopies; j++)
1835 {
1836 if (j == 0)
1837 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1838 else
1839 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1840
ebfd146a
IR
1841 /* Generate first half of the widened result: */
1842 new_stmt
b8698a0f 1843 = vect_gen_widened_results_half (code1, decl1,
ebfd146a
IR
1844 vec_oprnd0, vec_oprnd1,
1845 unary_op, vec_dest, gsi, stmt);
1846 if (j == 0)
1847 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1848 else
1849 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1850 prev_stmt_info = vinfo_for_stmt (new_stmt);
1851
1852 /* Generate second half of the widened result: */
1853 new_stmt
1854 = vect_gen_widened_results_half (code2, decl2,
1855 vec_oprnd0, vec_oprnd1,
1856 unary_op, vec_dest, gsi, stmt);
1857 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1858 prev_stmt_info = vinfo_for_stmt (new_stmt);
1859 }
1860 break;
1861
1862 case NARROW:
1863 /* In case the vectorization factor (VF) is bigger than the number
1864 of elements that we can fit in a vectype (nunits), we have to
1865 generate more than one vector stmt - i.e - we need to "unroll"
1866 the vector stmt by a factor VF/nunits. */
1867 for (j = 0; j < ncopies; j++)
1868 {
1869 /* Handle uses. */
1870 if (j == 0)
1871 {
1872 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1873 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1874 }
1875 else
1876 {
1877 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1878 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1879 }
1880
9dc3f7de 1881 /* Arguments are ready. Create the new vector stmt. */
ebfd146a
IR
1882 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1883 vec_oprnd1);
1884 new_temp = make_ssa_name (vec_dest, new_stmt);
1885 gimple_assign_set_lhs (new_stmt, new_temp);
1886 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1887
1888 if (j == 0)
1889 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1890 else
1891 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1892
1893 prev_stmt_info = vinfo_for_stmt (new_stmt);
1894 }
1895
1896 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1897 }
1898
1899 if (vec_oprnds0)
b8698a0f 1900 VEC_free (tree, heap, vec_oprnds0);
ebfd146a
IR
1901
1902 return true;
1903}
ff802fa1
IR
1904
1905
ebfd146a
IR
1906/* Function vectorizable_assignment.
1907
b8698a0f
L
1908 Check if STMT performs an assignment (copy) that can be vectorized.
1909 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1910 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1911 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1912
1913static bool
1914vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1915 gimple *vec_stmt, slp_tree slp_node)
1916{
1917 tree vec_dest;
1918 tree scalar_dest;
1919 tree op;
1920 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1921 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1922 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1923 tree new_temp;
1924 tree def;
1925 gimple def_stmt;
1926 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 1927 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 1928 int ncopies;
f18b55bd 1929 int i, j;
ebfd146a
IR
1930 VEC(tree,heap) *vec_oprnds = NULL;
1931 tree vop;
a70d6342 1932 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
1933 gimple new_stmt = NULL;
1934 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
1935 enum tree_code code;
1936 tree vectype_in;
ebfd146a
IR
1937
1938 /* Multiple types in SLP are handled by creating the appropriate number of
1939 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1940 case of SLP. */
1941 if (slp_node)
1942 ncopies = 1;
1943 else
1944 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1945
1946 gcc_assert (ncopies >= 1);
ebfd146a 1947
a70d6342 1948 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1949 return false;
1950
8644a673 1951 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1952 return false;
1953
1954 /* Is vectorizable assignment? */
1955 if (!is_gimple_assign (stmt))
1956 return false;
1957
1958 scalar_dest = gimple_assign_lhs (stmt);
1959 if (TREE_CODE (scalar_dest) != SSA_NAME)
1960 return false;
1961
fde9c428 1962 code = gimple_assign_rhs_code (stmt);
ebfd146a 1963 if (gimple_assign_single_p (stmt)
fde9c428
RG
1964 || code == PAREN_EXPR
1965 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
1966 op = gimple_assign_rhs1 (stmt);
1967 else
1968 return false;
1969
fde9c428
RG
1970 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1971 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a
IR
1972 {
1973 if (vect_print_dump_info (REPORT_DETAILS))
1974 fprintf (vect_dump, "use not simple.");
1975 return false;
1976 }
1977
fde9c428
RG
1978 /* We can handle NOP_EXPR conversions that do not change the number
1979 of elements or the vector size. */
1980 if (CONVERT_EXPR_CODE_P (code)
1981 && (!vectype_in
1982 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
1983 || (GET_MODE_SIZE (TYPE_MODE (vectype))
1984 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
1985 return false;
1986
ebfd146a
IR
1987 if (!vec_stmt) /* transformation not required. */
1988 {
1989 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1990 if (vect_print_dump_info (REPORT_DETAILS))
1991 fprintf (vect_dump, "=== vectorizable_assignment ===");
1992 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1993 return true;
1994 }
1995
1996 /** Transform. **/
1997 if (vect_print_dump_info (REPORT_DETAILS))
1998 fprintf (vect_dump, "transform assignment.");
1999
2000 /* Handle def. */
2001 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2002
2003 /* Handle use. */
f18b55bd 2004 for (j = 0; j < ncopies; j++)
ebfd146a 2005 {
f18b55bd
IR
2006 /* Handle uses. */
2007 if (j == 0)
2008 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2009 else
2010 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2011
2012 /* Arguments are ready. create the new vector stmt. */
ac47786e 2013 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
f18b55bd 2014 {
fde9c428 2015 if (CONVERT_EXPR_CODE_P (code))
4a73490d 2016 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
2017 new_stmt = gimple_build_assign (vec_dest, vop);
2018 new_temp = make_ssa_name (vec_dest, new_stmt);
2019 gimple_assign_set_lhs (new_stmt, new_temp);
2020 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2021 if (slp_node)
2022 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2023 }
ebfd146a
IR
2024
2025 if (slp_node)
f18b55bd
IR
2026 continue;
2027
2028 if (j == 0)
2029 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2030 else
2031 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2032
2033 prev_stmt_info = vinfo_for_stmt (new_stmt);
2034 }
b8698a0f
L
2035
2036 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
2037 return true;
2038}
2039
9dc3f7de
IR
2040
2041/* Function vectorizable_shift.
2042
2043 Check if STMT performs a shift operation that can be vectorized.
2044 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2045 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2046 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2047
2048static bool
2049vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2050 gimple *vec_stmt, slp_tree slp_node)
2051{
2052 tree vec_dest;
2053 tree scalar_dest;
2054 tree op0, op1 = NULL;
2055 tree vec_oprnd1 = NULL_TREE;
2056 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2057 tree vectype;
2058 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2059 enum tree_code code;
2060 enum machine_mode vec_mode;
2061 tree new_temp;
2062 optab optab;
2063 int icode;
2064 enum machine_mode optab_op2_mode;
2065 tree def;
2066 gimple def_stmt;
2067 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2068 gimple new_stmt = NULL;
2069 stmt_vec_info prev_stmt_info;
2070 int nunits_in;
2071 int nunits_out;
2072 tree vectype_out;
2073 int ncopies;
2074 int j, i;
2075 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2076 tree vop0, vop1;
2077 unsigned int k;
2078 bool scalar_shift_arg = false;
2079 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2080 int vf;
2081
2082 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2083 return false;
2084
2085 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2086 return false;
2087
2088 /* Is STMT a vectorizable binary/unary operation? */
2089 if (!is_gimple_assign (stmt))
2090 return false;
2091
2092 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2093 return false;
2094
2095 code = gimple_assign_rhs_code (stmt);
2096
2097 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2098 || code == RROTATE_EXPR))
2099 return false;
2100
2101 scalar_dest = gimple_assign_lhs (stmt);
2102 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2103
2104 op0 = gimple_assign_rhs1 (stmt);
2105 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2106 &def_stmt, &def, &dt[0], &vectype))
2107 {
2108 if (vect_print_dump_info (REPORT_DETAILS))
2109 fprintf (vect_dump, "use not simple.");
2110 return false;
2111 }
2112 /* If op0 is an external or constant def use a vector type with
2113 the same size as the output vector type. */
2114 if (!vectype)
2115 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2116 if (vec_stmt)
2117 gcc_assert (vectype);
2118 if (!vectype)
2119 {
2120 if (vect_print_dump_info (REPORT_DETAILS))
2121 {
2122 fprintf (vect_dump, "no vectype for scalar type ");
2123 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2124 }
2125
2126 return false;
2127 }
2128
2129 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2130 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2131 if (nunits_out != nunits_in)
2132 return false;
2133
2134 op1 = gimple_assign_rhs2 (stmt);
2135 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[1]))
2136 {
2137 if (vect_print_dump_info (REPORT_DETAILS))
2138 fprintf (vect_dump, "use not simple.");
2139 return false;
2140 }
2141
2142 if (loop_vinfo)
2143 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2144 else
2145 vf = 1;
2146
2147 /* Multiple types in SLP are handled by creating the appropriate number of
2148 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2149 case of SLP. */
2150 if (slp_node)
2151 ncopies = 1;
2152 else
2153 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2154
2155 gcc_assert (ncopies >= 1);
2156
2157 /* Determine whether the shift amount is a vector, or scalar. If the
2158 shift/rotate amount is a vector, use the vector/vector shift optabs. */
2159
2160 /* Vector shifted by vector. */
2161 if (dt[1] == vect_internal_def)
2162 {
2163 optab = optab_for_tree_code (code, vectype, optab_vector);
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 fprintf (vect_dump, "vector/vector shift/rotate found.");
2166 }
2167 /* See if the machine has a vector shifted by scalar insn and if not
2168 then see if it has a vector shifted by vector insn. */
2169 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2170 {
2171 optab = optab_for_tree_code (code, vectype, optab_scalar);
2172 if (optab
2173 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2174 {
2175 scalar_shift_arg = true;
2176 if (vect_print_dump_info (REPORT_DETAILS))
2177 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2178 }
2179 else
2180 {
2181 optab = optab_for_tree_code (code, vectype, optab_vector);
2182 if (optab
2183 && (optab_handler (optab, TYPE_MODE (vectype))
2184 != CODE_FOR_nothing))
2185 {
2186 if (vect_print_dump_info (REPORT_DETAILS))
2187 fprintf (vect_dump, "vector/vector shift/rotate found.");
2188
2189 /* Unlike the other binary operators, shifts/rotates have
2190 the rhs being int, instead of the same type as the lhs,
2191 so make sure the scalar is the right type if we are
2192 dealing with vectors of short/char. */
2193 if (dt[1] == vect_constant_def)
2194 op1 = fold_convert (TREE_TYPE (vectype), op1);
2195 }
2196 }
2197 }
2198 else
2199 {
2200 if (vect_print_dump_info (REPORT_DETAILS))
2201 fprintf (vect_dump, "operand mode requires invariant argument.");
2202 return false;
2203 }
2204
2205 /* Supportable by target? */
2206 if (!optab)
2207 {
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "no optab.");
2210 return false;
2211 }
2212 vec_mode = TYPE_MODE (vectype);
2213 icode = (int) optab_handler (optab, vec_mode);
2214 if (icode == CODE_FOR_nothing)
2215 {
2216 if (vect_print_dump_info (REPORT_DETAILS))
2217 fprintf (vect_dump, "op not supported by target.");
2218 /* Check only during analysis. */
2219 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2220 || (vf < vect_min_worthwhile_factor (code)
2221 && !vec_stmt))
2222 return false;
2223 if (vect_print_dump_info (REPORT_DETAILS))
2224 fprintf (vect_dump, "proceeding using word mode.");
2225 }
2226
2227 /* Worthwhile without SIMD support? Check only during analysis. */
2228 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2229 && vf < vect_min_worthwhile_factor (code)
2230 && !vec_stmt)
2231 {
2232 if (vect_print_dump_info (REPORT_DETAILS))
2233 fprintf (vect_dump, "not worthwhile without SIMD support.");
2234 return false;
2235 }
2236
2237 if (!vec_stmt) /* transformation not required. */
2238 {
2239 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
2240 if (vect_print_dump_info (REPORT_DETAILS))
2241 fprintf (vect_dump, "=== vectorizable_shift ===");
2242 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2243 return true;
2244 }
2245
2246 /** Transform. **/
2247
2248 if (vect_print_dump_info (REPORT_DETAILS))
2249 fprintf (vect_dump, "transform binary/unary operation.");
2250
2251 /* Handle def. */
2252 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2253
2254 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2255 created in the previous stages of the recursion, so no allocation is
2256 needed, except for the case of shift with scalar shift argument. In that
2257 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2258 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2259 In case of loop-based vectorization we allocate VECs of size 1. We
2260 allocate VEC_OPRNDS1 only in case of binary operation. */
2261 if (!slp_node)
2262 {
2263 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2264 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2265 }
2266 else if (scalar_shift_arg)
2267 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2268
2269 prev_stmt_info = NULL;
2270 for (j = 0; j < ncopies; j++)
2271 {
2272 /* Handle uses. */
2273 if (j == 0)
2274 {
2275 if (scalar_shift_arg)
2276 {
2277 /* Vector shl and shr insn patterns can be defined with scalar
2278 operand 2 (shift operand). In this case, use constant or loop
2279 invariant op1 directly, without extending it to vector mode
2280 first. */
2281 optab_op2_mode = insn_data[icode].operand[2].mode;
2282 if (!VECTOR_MODE_P (optab_op2_mode))
2283 {
2284 if (vect_print_dump_info (REPORT_DETAILS))
2285 fprintf (vect_dump, "operand 1 using scalar mode.");
2286 vec_oprnd1 = op1;
2287 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2288 if (slp_node)
2289 {
2290 /* Store vec_oprnd1 for every vector stmt to be created
2291 for SLP_NODE. We check during the analysis that all
2292 the shift arguments are the same.
2293 TODO: Allow different constants for different vector
2294 stmts generated for an SLP instance. */
2295 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2296 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2297 }
2298 }
2299 }
2300
2301 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2302 (a special case for certain kind of vector shifts); otherwise,
2303 operand 1 should be of a vector type (the usual case). */
2304 if (vec_oprnd1)
2305 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2306 slp_node);
2307 else
2308 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2309 slp_node);
2310 }
2311 else
2312 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2313
2314 /* Arguments are ready. Create the new vector stmt. */
2315 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2316 {
2317 vop1 = VEC_index (tree, vec_oprnds1, i);
2318 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2319 new_temp = make_ssa_name (vec_dest, new_stmt);
2320 gimple_assign_set_lhs (new_stmt, new_temp);
2321 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2322 if (slp_node)
2323 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2324 }
2325
2326 if (slp_node)
2327 continue;
2328
2329 if (j == 0)
2330 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2331 else
2332 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2333 prev_stmt_info = vinfo_for_stmt (new_stmt);
2334 }
2335
2336 VEC_free (tree, heap, vec_oprnds0);
2337 VEC_free (tree, heap, vec_oprnds1);
2338
2339 return true;
2340}
2341
2342
ebfd146a
IR
2343/* Function vectorizable_operation.
2344
16949072
RG
2345 Check if STMT performs a binary, unary or ternary operation that can
2346 be vectorized.
b8698a0f 2347 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2348 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2349 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2350
2351static bool
2352vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2353 gimple *vec_stmt, slp_tree slp_node)
2354{
2355 tree vec_dest;
2356 tree scalar_dest;
16949072 2357 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
ebfd146a 2358 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b690cc0f 2359 tree vectype;
ebfd146a
IR
2360 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2361 enum tree_code code;
2362 enum machine_mode vec_mode;
2363 tree new_temp;
2364 int op_type;
2365 optab optab;
2366 int icode;
ebfd146a
IR
2367 tree def;
2368 gimple def_stmt;
16949072
RG
2369 enum vect_def_type dt[3]
2370 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
ebfd146a
IR
2371 gimple new_stmt = NULL;
2372 stmt_vec_info prev_stmt_info;
b690cc0f 2373 int nunits_in;
ebfd146a
IR
2374 int nunits_out;
2375 tree vectype_out;
2376 int ncopies;
2377 int j, i;
16949072
RG
2378 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
2379 tree vop0, vop1, vop2;
a70d6342
IR
2380 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2381 int vf;
2382
a70d6342 2383 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2384 return false;
2385
8644a673 2386 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2387 return false;
2388
2389 /* Is STMT a vectorizable binary/unary operation? */
2390 if (!is_gimple_assign (stmt))
2391 return false;
2392
2393 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2394 return false;
2395
ebfd146a
IR
2396 code = gimple_assign_rhs_code (stmt);
2397
2398 /* For pointer addition, we should use the normal plus for
2399 the vector addition. */
2400 if (code == POINTER_PLUS_EXPR)
2401 code = PLUS_EXPR;
2402
2403 /* Support only unary or binary operations. */
2404 op_type = TREE_CODE_LENGTH (code);
16949072 2405 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
ebfd146a
IR
2406 {
2407 if (vect_print_dump_info (REPORT_DETAILS))
16949072
RG
2408 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
2409 op_type);
ebfd146a
IR
2410 return false;
2411 }
2412
b690cc0f
RG
2413 scalar_dest = gimple_assign_lhs (stmt);
2414 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2415
ebfd146a 2416 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
2417 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2418 &def_stmt, &def, &dt[0], &vectype))
ebfd146a
IR
2419 {
2420 if (vect_print_dump_info (REPORT_DETAILS))
2421 fprintf (vect_dump, "use not simple.");
2422 return false;
2423 }
b690cc0f
RG
2424 /* If op0 is an external or constant def use a vector type with
2425 the same size as the output vector type. */
2426 if (!vectype)
2427 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
2428 if (vec_stmt)
2429 gcc_assert (vectype);
2430 if (!vectype)
2431 {
2432 if (vect_print_dump_info (REPORT_DETAILS))
2433 {
2434 fprintf (vect_dump, "no vectype for scalar type ");
2435 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2436 }
2437
2438 return false;
2439 }
b690cc0f
RG
2440
2441 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2442 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2443 if (nunits_out != nunits_in)
2444 return false;
ebfd146a 2445
16949072 2446 if (op_type == binary_op || op_type == ternary_op)
ebfd146a
IR
2447 {
2448 op1 = gimple_assign_rhs2 (stmt);
b8698a0f 2449 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 2450 &dt[1]))
ebfd146a
IR
2451 {
2452 if (vect_print_dump_info (REPORT_DETAILS))
2453 fprintf (vect_dump, "use not simple.");
2454 return false;
2455 }
2456 }
16949072
RG
2457 if (op_type == ternary_op)
2458 {
2459 op2 = gimple_assign_rhs3 (stmt);
2460 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
2461 &dt[2]))
2462 {
2463 if (vect_print_dump_info (REPORT_DETAILS))
2464 fprintf (vect_dump, "use not simple.");
2465 return false;
2466 }
2467 }
ebfd146a 2468
b690cc0f
RG
2469 if (loop_vinfo)
2470 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2471 else
2472 vf = 1;
2473
2474 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 2475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
b690cc0f
RG
2476 case of SLP. */
2477 if (slp_node)
2478 ncopies = 1;
2479 else
2480 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2481
2482 gcc_assert (ncopies >= 1);
2483
9dc3f7de 2484 /* Shifts are handled in vectorizable_shift (). */
ebfd146a
IR
2485 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2486 || code == RROTATE_EXPR)
9dc3f7de 2487 return false;
ebfd146a 2488
16949072 2489 optab = optab_for_tree_code (code, vectype, optab_default);
ebfd146a
IR
2490
2491 /* Supportable by target? */
2492 if (!optab)
2493 {
2494 if (vect_print_dump_info (REPORT_DETAILS))
2495 fprintf (vect_dump, "no optab.");
2496 return false;
2497 }
2498 vec_mode = TYPE_MODE (vectype);
947131ba 2499 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
2500 if (icode == CODE_FOR_nothing)
2501 {
2502 if (vect_print_dump_info (REPORT_DETAILS))
2503 fprintf (vect_dump, "op not supported by target.");
2504 /* Check only during analysis. */
2505 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
a70d6342 2506 || (vf < vect_min_worthwhile_factor (code)
ebfd146a
IR
2507 && !vec_stmt))
2508 return false;
2509 if (vect_print_dump_info (REPORT_DETAILS))
2510 fprintf (vect_dump, "proceeding using word mode.");
2511 }
2512
ff802fa1 2513 /* Worthwhile without SIMD support? Check only during analysis. */
ebfd146a 2514 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
a70d6342 2515 && vf < vect_min_worthwhile_factor (code)
ebfd146a
IR
2516 && !vec_stmt)
2517 {
2518 if (vect_print_dump_info (REPORT_DETAILS))
2519 fprintf (vect_dump, "not worthwhile without SIMD support.");
2520 return false;
2521 }
2522
2523 if (!vec_stmt) /* transformation not required. */
2524 {
2525 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2526 if (vect_print_dump_info (REPORT_DETAILS))
2527 fprintf (vect_dump, "=== vectorizable_operation ===");
2528 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2529 return true;
2530 }
2531
2532 /** Transform. **/
2533
2534 if (vect_print_dump_info (REPORT_DETAILS))
2535 fprintf (vect_dump, "transform binary/unary operation.");
2536
2537 /* Handle def. */
2538 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2539
ff802fa1 2540 /* Allocate VECs for vector operands. In case of SLP, vector operands are
ebfd146a 2541 created in the previous stages of the recursion, so no allocation is
ff802fa1 2542 needed, except for the case of shift with scalar shift argument. In that
ebfd146a
IR
2543 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2544 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
ff802fa1 2545 In case of loop-based vectorization we allocate VECs of size 1. We
b8698a0f 2546 allocate VEC_OPRNDS1 only in case of binary operation. */
ebfd146a
IR
2547 if (!slp_node)
2548 {
2549 vec_oprnds0 = VEC_alloc (tree, heap, 1);
16949072 2550 if (op_type == binary_op || op_type == ternary_op)
ebfd146a 2551 vec_oprnds1 = VEC_alloc (tree, heap, 1);
16949072
RG
2552 if (op_type == ternary_op)
2553 vec_oprnds2 = VEC_alloc (tree, heap, 1);
ebfd146a 2554 }
ebfd146a
IR
2555
2556 /* In case the vectorization factor (VF) is bigger than the number
2557 of elements that we can fit in a vectype (nunits), we have to generate
2558 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 2559 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 2560 from one copy of the vector stmt to the next, in the field
ff802fa1 2561 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 2562 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
2563 stmts that use the defs of the current stmt. The example below
2564 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
2565 we need to create 4 vectorized stmts):
ebfd146a
IR
2566
2567 before vectorization:
2568 RELATED_STMT VEC_STMT
2569 S1: x = memref - -
2570 S2: z = x + 1 - -
2571
2572 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2573 there):
2574 RELATED_STMT VEC_STMT
2575 VS1_0: vx0 = memref0 VS1_1 -
2576 VS1_1: vx1 = memref1 VS1_2 -
2577 VS1_2: vx2 = memref2 VS1_3 -
2578 VS1_3: vx3 = memref3 - -
2579 S1: x = load - VS1_0
2580 S2: z = x + 1 - -
2581
2582 step2: vectorize stmt S2 (done here):
2583 To vectorize stmt S2 we first need to find the relevant vector
ff802fa1 2584 def for the first operand 'x'. This is, as usual, obtained from
ebfd146a 2585 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
ff802fa1
IR
2586 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2587 relevant vector def 'vx0'. Having found 'vx0' we can generate
ebfd146a
IR
2588 the vector stmt VS2_0, and as usual, record it in the
2589 STMT_VINFO_VEC_STMT of stmt S2.
2590 When creating the second copy (VS2_1), we obtain the relevant vector
2591 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
ff802fa1
IR
2592 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2593 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
ebfd146a 2594 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
ff802fa1 2595 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
ebfd146a
IR
2596 chain of stmts and pointers:
2597 RELATED_STMT VEC_STMT
2598 VS1_0: vx0 = memref0 VS1_1 -
2599 VS1_1: vx1 = memref1 VS1_2 -
2600 VS1_2: vx2 = memref2 VS1_3 -
2601 VS1_3: vx3 = memref3 - -
2602 S1: x = load - VS1_0
2603 VS2_0: vz0 = vx0 + v1 VS2_1 -
2604 VS2_1: vz1 = vx1 + v1 VS2_2 -
2605 VS2_2: vz2 = vx2 + v1 VS2_3 -
2606 VS2_3: vz3 = vx3 + v1 - -
2607 S2: z = x + 1 - VS2_0 */
2608
2609 prev_stmt_info = NULL;
2610 for (j = 0; j < ncopies; j++)
2611 {
2612 /* Handle uses. */
2613 if (j == 0)
2614 {
16949072 2615 if (op_type == binary_op || op_type == ternary_op)
b8698a0f 2616 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
ebfd146a
IR
2617 slp_node);
2618 else
b8698a0f 2619 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
ebfd146a 2620 slp_node);
16949072
RG
2621 if (op_type == ternary_op)
2622 {
2623 vec_oprnds2 = VEC_alloc (tree, heap, 1);
2624 VEC_quick_push (tree, vec_oprnds2,
2625 vect_get_vec_def_for_operand (op2, stmt, NULL));
2626 }
ebfd146a
IR
2627 }
2628 else
16949072
RG
2629 {
2630 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2631 if (op_type == ternary_op)
2632 {
2633 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
2634 VEC_quick_push (tree, vec_oprnds2,
2635 vect_get_vec_def_for_stmt_copy (dt[2],
2636 vec_oprnd));
2637 }
2638 }
ebfd146a 2639
9dc3f7de 2640 /* Arguments are ready. Create the new vector stmt. */
ac47786e 2641 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
ebfd146a 2642 {
16949072
RG
2643 vop1 = ((op_type == binary_op || op_type == ternary_op)
2644 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
2645 vop2 = ((op_type == ternary_op)
2646 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
2647 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
2648 vop0, vop1, vop2);
ebfd146a
IR
2649 new_temp = make_ssa_name (vec_dest, new_stmt);
2650 gimple_assign_set_lhs (new_stmt, new_temp);
2651 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2652 if (slp_node)
2653 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2654 }
2655
2656 if (slp_node)
2657 continue;
2658
2659 if (j == 0)
2660 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2661 else
2662 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2663 prev_stmt_info = vinfo_for_stmt (new_stmt);
2664 }
2665
2666 VEC_free (tree, heap, vec_oprnds0);
2667 if (vec_oprnds1)
2668 VEC_free (tree, heap, vec_oprnds1);
16949072
RG
2669 if (vec_oprnds2)
2670 VEC_free (tree, heap, vec_oprnds2);
ebfd146a
IR
2671
2672 return true;
2673}
2674
2675
ff802fa1 2676/* Get vectorized definitions for loop-based vectorization. For the first
b8698a0f
L
2677 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2678 scalar operand), and for the rest we get a copy with
ebfd146a
IR
2679 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2680 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2681 The vectors are collected into VEC_OPRNDS. */
2682
2683static void
b8698a0f 2684vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
ebfd146a
IR
2685 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2686{
2687 tree vec_oprnd;
2688
2689 /* Get first vector operand. */
2690 /* All the vector operands except the very first one (that is scalar oprnd)
2691 are stmt copies. */
b8698a0f 2692 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
ebfd146a
IR
2693 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2694 else
2695 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2696
2697 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2698
2699 /* Get second vector operand. */
2700 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2701 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
b8698a0f 2702
ebfd146a
IR
2703 *oprnd = vec_oprnd;
2704
b8698a0f 2705 /* For conversion in multiple steps, continue to get operands
ebfd146a
IR
2706 recursively. */
2707 if (multi_step_cvt)
b8698a0f 2708 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
ebfd146a
IR
2709}
2710
2711
2712/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
b8698a0f 2713 For multi-step conversions store the resulting vectors and call the function
ebfd146a
IR
2714 recursively. */
2715
2716static void
2717vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2718 int multi_step_cvt, gimple stmt,
2719 VEC (tree, heap) *vec_dsts,
2720 gimple_stmt_iterator *gsi,
2721 slp_tree slp_node, enum tree_code code,
2722 stmt_vec_info *prev_stmt_info)
2723{
2724 unsigned int i;
2725 tree vop0, vop1, new_tmp, vec_dest;
2726 gimple new_stmt;
2727 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2728
b8698a0f 2729 vec_dest = VEC_pop (tree, vec_dsts);
ebfd146a
IR
2730
2731 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2732 {
2733 /* Create demotion operation. */
2734 vop0 = VEC_index (tree, *vec_oprnds, i);
2735 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2736 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2737 new_tmp = make_ssa_name (vec_dest, new_stmt);
2738 gimple_assign_set_lhs (new_stmt, new_tmp);
2739 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2740
2741 if (multi_step_cvt)
2742 /* Store the resulting vector for next recursive call. */
b8698a0f 2743 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
ebfd146a
IR
2744 else
2745 {
b8698a0f 2746 /* This is the last step of the conversion sequence. Store the
ebfd146a
IR
2747 vectors in SLP_NODE or in vector info of the scalar statement
2748 (or in STMT_VINFO_RELATED_STMT chain). */
2749 if (slp_node)
2750 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2751 else
2752 {
2753 if (!*prev_stmt_info)
2754 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2755 else
2756 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2757
2758 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2759 }
2760 }
2761 }
2762
2763 /* For multi-step demotion operations we first generate demotion operations
b8698a0f 2764 from the source type to the intermediate types, and then combine the
ebfd146a
IR
2765 results (stored in VEC_OPRNDS) in demotion operation to the destination
2766 type. */
2767 if (multi_step_cvt)
2768 {
2769 /* At each level of recursion we have have of the operands we had at the
2770 previous level. */
2771 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
b8698a0f 2772 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
ebfd146a
IR
2773 stmt, vec_dsts, gsi, slp_node,
2774 code, prev_stmt_info);
2775 }
2776}
2777
2778
2779/* Function vectorizable_type_demotion
2780
2781 Check if STMT performs a binary or unary operation that involves
2782 type demotion, and if it can be vectorized.
2783 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2784 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2785 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2786
2787static bool
2788vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2789 gimple *vec_stmt, slp_tree slp_node)
2790{
2791 tree vec_dest;
2792 tree scalar_dest;
2793 tree op0;
2794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2795 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2796 enum tree_code code, code1 = ERROR_MARK;
2797 tree def;
2798 gimple def_stmt;
2799 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2800 stmt_vec_info prev_stmt_info;
2801 int nunits_in;
2802 int nunits_out;
2803 tree vectype_out;
2804 int ncopies;
2805 int j, i;
2806 tree vectype_in;
2807 int multi_step_cvt = 0;
2808 VEC (tree, heap) *vec_oprnds0 = NULL;
2809 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2810 tree last_oprnd, intermediate_type;
2811
a70d6342
IR
2812 /* FORNOW: not supported by basic block SLP vectorization. */
2813 gcc_assert (loop_vinfo);
2814
ebfd146a
IR
2815 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2816 return false;
2817
8644a673 2818 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2819 return false;
2820
2821 /* Is STMT a vectorizable type-demotion operation? */
2822 if (!is_gimple_assign (stmt))
2823 return false;
2824
2825 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2826 return false;
2827
2828 code = gimple_assign_rhs_code (stmt);
2829 if (!CONVERT_EXPR_CODE_P (code))
2830 return false;
2831
b690cc0f
RG
2832 scalar_dest = gimple_assign_lhs (stmt);
2833 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2834
2835 /* Check the operands of the operation. */
ebfd146a 2836 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
2837 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2838 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2839 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2840 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2841 && CONVERT_EXPR_CODE_P (code))))
2842 return false;
2843 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
2844 &def_stmt, &def, &dt[0], &vectype_in))
2845 {
2846 if (vect_print_dump_info (REPORT_DETAILS))
2847 fprintf (vect_dump, "use not simple.");
2848 return false;
2849 }
2850 /* If op0 is an external def use a vector type with the
2851 same size as the output vector type if possible. */
2852 if (!vectype_in)
2853 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
2854 if (vec_stmt)
2855 gcc_assert (vectype_in);
ebfd146a 2856 if (!vectype_in)
7d8930a0
IR
2857 {
2858 if (vect_print_dump_info (REPORT_DETAILS))
2859 {
2860 fprintf (vect_dump, "no vectype for scalar type ");
2861 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2862 }
2863
2864 return false;
2865 }
ebfd146a 2866
b690cc0f 2867 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
ebfd146a
IR
2868 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2869 if (nunits_in >= nunits_out)
2870 return false;
2871
2872 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 2873 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a
IR
2874 case of SLP. */
2875 if (slp_node)
2876 ncopies = 1;
2877 else
2878 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
ebfd146a
IR
2879 gcc_assert (ncopies >= 1);
2880
ebfd146a 2881 /* Supportable by target? */
b690cc0f
RG
2882 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
2883 &code1, &multi_step_cvt, &interm_types))
ebfd146a
IR
2884 return false;
2885
ebfd146a
IR
2886 if (!vec_stmt) /* transformation not required. */
2887 {
2888 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2889 if (vect_print_dump_info (REPORT_DETAILS))
2890 fprintf (vect_dump, "=== vectorizable_demotion ===");
2891 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2892 return true;
2893 }
2894
2895 /** Transform. **/
2896 if (vect_print_dump_info (REPORT_DETAILS))
2897 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2898 ncopies);
2899
b8698a0f
L
2900 /* In case of multi-step demotion, we first generate demotion operations to
2901 the intermediate types, and then from that types to the final one.
ebfd146a 2902 We create vector destinations for the intermediate type (TYPES) received
b8698a0f 2903 from supportable_narrowing_operation, and store them in the correct order
ebfd146a
IR
2904 for future use in vect_create_vectorized_demotion_stmts(). */
2905 if (multi_step_cvt)
2906 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2907 else
2908 vec_dsts = VEC_alloc (tree, heap, 1);
b8698a0f 2909
ebfd146a
IR
2910 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2911 VEC_quick_push (tree, vec_dsts, vec_dest);
2912
2913 if (multi_step_cvt)
2914 {
b8698a0f 2915 for (i = VEC_length (tree, interm_types) - 1;
ebfd146a
IR
2916 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2917 {
b8698a0f 2918 vec_dest = vect_create_destination_var (scalar_dest,
ebfd146a
IR
2919 intermediate_type);
2920 VEC_quick_push (tree, vec_dsts, vec_dest);
2921 }
2922 }
2923
2924 /* In case the vectorization factor (VF) is bigger than the number
2925 of elements that we can fit in a vectype (nunits), we have to generate
2926 more than one vector stmt - i.e - we need to "unroll" the
2927 vector stmt by a factor VF/nunits. */
2928 last_oprnd = op0;
2929 prev_stmt_info = NULL;
2930 for (j = 0; j < ncopies; j++)
2931 {
2932 /* Handle uses. */
2933 if (slp_node)
9dc3f7de 2934 vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL, -1);
ebfd146a
IR
2935 else
2936 {
2937 VEC_free (tree, heap, vec_oprnds0);
2938 vec_oprnds0 = VEC_alloc (tree, heap,
2939 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
b8698a0f 2940 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
ebfd146a
IR
2941 vect_pow2 (multi_step_cvt) - 1);
2942 }
2943
9dc3f7de 2944 /* Arguments are ready. Create the new vector stmts. */
ebfd146a 2945 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
b8698a0f 2946 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
ebfd146a 2947 multi_step_cvt, stmt, tmp_vec_dsts,
b8698a0f 2948 gsi, slp_node, code1,
ebfd146a
IR
2949 &prev_stmt_info);
2950 }
2951
2952 VEC_free (tree, heap, vec_oprnds0);
2953 VEC_free (tree, heap, vec_dsts);
2954 VEC_free (tree, heap, tmp_vec_dsts);
2955 VEC_free (tree, heap, interm_types);
2956
2957 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2958 return true;
2959}
2960
2961
2962/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
ff802fa1 2963 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
ebfd146a
IR
2964 the resulting vectors and call the function recursively. */
2965
2966static void
2967vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2968 VEC (tree, heap) **vec_oprnds1,
2969 int multi_step_cvt, gimple stmt,
2970 VEC (tree, heap) *vec_dsts,
2971 gimple_stmt_iterator *gsi,
2972 slp_tree slp_node, enum tree_code code1,
b8698a0f 2973 enum tree_code code2, tree decl1,
ebfd146a
IR
2974 tree decl2, int op_type,
2975 stmt_vec_info *prev_stmt_info)
2976{
2977 int i;
2978 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2979 gimple new_stmt1, new_stmt2;
2980 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2981 VEC (tree, heap) *vec_tmp;
2982
2983 vec_dest = VEC_pop (tree, vec_dsts);
2984 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2985
ac47786e 2986 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
ebfd146a
IR
2987 {
2988 if (op_type == binary_op)
2989 vop1 = VEC_index (tree, *vec_oprnds1, i);
2990 else
2991 vop1 = NULL_TREE;
2992
2993 /* Generate the two halves of promotion operation. */
b8698a0f 2994 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
ebfd146a
IR
2995 op_type, vec_dest, gsi, stmt);
2996 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2997 op_type, vec_dest, gsi, stmt);
2998 if (is_gimple_call (new_stmt1))
2999 {
3000 new_tmp1 = gimple_call_lhs (new_stmt1);
3001 new_tmp2 = gimple_call_lhs (new_stmt2);
3002 }
3003 else
3004 {
3005 new_tmp1 = gimple_assign_lhs (new_stmt1);
3006 new_tmp2 = gimple_assign_lhs (new_stmt2);
3007 }
3008
3009 if (multi_step_cvt)
3010 {
3011 /* Store the results for the recursive call. */
3012 VEC_quick_push (tree, vec_tmp, new_tmp1);
3013 VEC_quick_push (tree, vec_tmp, new_tmp2);
3014 }
3015 else
3016 {
3017 /* Last step of promotion sequience - store the results. */
3018 if (slp_node)
3019 {
3020 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
3021 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
3022 }
3023 else
3024 {
3025 if (!*prev_stmt_info)
3026 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
3027 else
3028 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
3029
3030 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
3031 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
3032 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
3033 }
3034 }
3035 }
3036
3037 if (multi_step_cvt)
3038 {
b8698a0f 3039 /* For multi-step promotion operation we first generate we call the
ff802fa1 3040 function recurcively for every stage. We start from the input type,
ebfd146a
IR
3041 create promotion operations to the intermediate types, and then
3042 create promotions to the output type. */
3043 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
ebfd146a
IR
3044 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
3045 multi_step_cvt - 1, stmt,
3046 vec_dsts, gsi, slp_node, code1,
3047 code2, decl2, decl2, op_type,
3048 prev_stmt_info);
3049 }
ff802fa1
IR
3050
3051 VEC_free (tree, heap, vec_tmp);
ebfd146a 3052}
b8698a0f 3053
ebfd146a
IR
3054
3055/* Function vectorizable_type_promotion
3056
3057 Check if STMT performs a binary or unary operation that involves
3058 type promotion, and if it can be vectorized.
3059 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3060 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3061 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3062
3063static bool
3064vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
3065 gimple *vec_stmt, slp_tree slp_node)
3066{
3067 tree vec_dest;
3068 tree scalar_dest;
3069 tree op0, op1 = NULL;
3070 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3071 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3072 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3073 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3074 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
b8698a0f 3075 int op_type;
ebfd146a
IR
3076 tree def;
3077 gimple def_stmt;
3078 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3079 stmt_vec_info prev_stmt_info;
3080 int nunits_in;
3081 int nunits_out;
3082 tree vectype_out;
3083 int ncopies;
3084 int j, i;
3085 tree vectype_in;
3086 tree intermediate_type = NULL_TREE;
3087 int multi_step_cvt = 0;
3088 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3089 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
b8698a0f 3090
a70d6342
IR
3091 /* FORNOW: not supported by basic block SLP vectorization. */
3092 gcc_assert (loop_vinfo);
b8698a0f 3093
ebfd146a
IR
3094 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3095 return false;
3096
8644a673 3097 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3098 return false;
3099
3100 /* Is STMT a vectorizable type-promotion operation? */
3101 if (!is_gimple_assign (stmt))
3102 return false;
3103
3104 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3105 return false;
3106
3107 code = gimple_assign_rhs_code (stmt);
3108 if (!CONVERT_EXPR_CODE_P (code)
3109 && code != WIDEN_MULT_EXPR)
3110 return false;
3111
b690cc0f
RG
3112 scalar_dest = gimple_assign_lhs (stmt);
3113 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3114
3115 /* Check the operands of the operation. */
ebfd146a 3116 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
3117 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3118 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3119 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3120 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3121 && CONVERT_EXPR_CODE_P (code))))
3122 return false;
3123 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
3124 &def_stmt, &def, &dt[0], &vectype_in))
3125 {
3126 if (vect_print_dump_info (REPORT_DETAILS))
3127 fprintf (vect_dump, "use not simple.");
3128 return false;
3129 }
3130 /* If op0 is an external or constant def use a vector type with
3131 the same size as the output vector type. */
3132 if (!vectype_in)
3133 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
3134 if (vec_stmt)
3135 gcc_assert (vectype_in);
ebfd146a 3136 if (!vectype_in)
7d8930a0
IR
3137 {
3138 if (vect_print_dump_info (REPORT_DETAILS))
3139 {
3140 fprintf (vect_dump, "no vectype for scalar type ");
3141 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3142 }
3143
3144 return false;
3145 }
ebfd146a 3146
b690cc0f 3147 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
ebfd146a
IR
3148 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3149 if (nunits_in <= nunits_out)
3150 return false;
3151
3152 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3153 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a
IR
3154 case of SLP. */
3155 if (slp_node)
3156 ncopies = 1;
3157 else
3158 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3159
3160 gcc_assert (ncopies >= 1);
3161
ebfd146a
IR
3162 op_type = TREE_CODE_LENGTH (code);
3163 if (op_type == binary_op)
3164 {
3165 op1 = gimple_assign_rhs2 (stmt);
a70d6342 3166 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
ebfd146a
IR
3167 {
3168 if (vect_print_dump_info (REPORT_DETAILS))
3169 fprintf (vect_dump, "use not simple.");
3170 return false;
3171 }
3172 }
3173
3174 /* Supportable by target? */
b690cc0f 3175 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
ebfd146a
IR
3176 &decl1, &decl2, &code1, &code2,
3177 &multi_step_cvt, &interm_types))
3178 return false;
3179
3180 /* Binary widening operation can only be supported directly by the
3181 architecture. */
3182 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3183
ebfd146a
IR
3184 if (!vec_stmt) /* transformation not required. */
3185 {
3186 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3187 if (vect_print_dump_info (REPORT_DETAILS))
3188 fprintf (vect_dump, "=== vectorizable_promotion ===");
3189 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
3190 return true;
3191 }
3192
3193 /** Transform. **/
3194
3195 if (vect_print_dump_info (REPORT_DETAILS))
3196 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3197 ncopies);
3198
3199 /* Handle def. */
b8698a0f 3200 /* In case of multi-step promotion, we first generate promotion operations
ebfd146a 3201 to the intermediate types, and then from that types to the final one.
b8698a0f
L
3202 We store vector destination in VEC_DSTS in the correct order for
3203 recursive creation of promotion operations in
ebfd146a
IR
3204 vect_create_vectorized_promotion_stmts(). Vector destinations are created
3205 according to TYPES recieved from supportable_widening_operation(). */
3206 if (multi_step_cvt)
3207 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
3208 else
3209 vec_dsts = VEC_alloc (tree, heap, 1);
3210
3211 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3212 VEC_quick_push (tree, vec_dsts, vec_dest);
3213
3214 if (multi_step_cvt)
3215 {
3216 for (i = VEC_length (tree, interm_types) - 1;
3217 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
3218 {
3219 vec_dest = vect_create_destination_var (scalar_dest,
3220 intermediate_type);
3221 VEC_quick_push (tree, vec_dsts, vec_dest);
3222 }
3223 }
b8698a0f 3224
ebfd146a
IR
3225 if (!slp_node)
3226 {
b8698a0f 3227 vec_oprnds0 = VEC_alloc (tree, heap,
ebfd146a
IR
3228 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3229 if (op_type == binary_op)
3230 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3231 }
3232
3233 /* In case the vectorization factor (VF) is bigger than the number
3234 of elements that we can fit in a vectype (nunits), we have to generate
3235 more than one vector stmt - i.e - we need to "unroll" the
3236 vector stmt by a factor VF/nunits. */
3237
3238 prev_stmt_info = NULL;
3239 for (j = 0; j < ncopies; j++)
3240 {
3241 /* Handle uses. */
3242 if (j == 0)
3243 {
3244 if (slp_node)
9dc3f7de
IR
3245 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
3246 &vec_oprnds1, -1);
ebfd146a
IR
3247 else
3248 {
3249 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3250 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
3251 if (op_type == binary_op)
3252 {
3253 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3254 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3255 }
3256 }
3257 }
3258 else
3259 {
3260 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3261 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
3262 if (op_type == binary_op)
3263 {
3264 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3265 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3266 }
3267 }
3268
9dc3f7de 3269 /* Arguments are ready. Create the new vector stmts. */
ebfd146a
IR
3270 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3271 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
b8698a0f 3272 multi_step_cvt, stmt,
ebfd146a
IR
3273 tmp_vec_dsts,
3274 gsi, slp_node, code1, code2,
3275 decl1, decl2, op_type,
3276 &prev_stmt_info);
3277 }
3278
3279 VEC_free (tree, heap, vec_dsts);
3280 VEC_free (tree, heap, tmp_vec_dsts);
3281 VEC_free (tree, heap, interm_types);
3282 VEC_free (tree, heap, vec_oprnds0);
3283 VEC_free (tree, heap, vec_oprnds1);
3284
3285 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3286 return true;
3287}
3288
3289
3290/* Function vectorizable_store.
3291
b8698a0f
L
3292 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3293 can be vectorized.
3294 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3295 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3296 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3297
3298static bool
3299vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3300 slp_tree slp_node)
3301{
3302 tree scalar_dest;
3303 tree data_ref;
3304 tree op;
3305 tree vec_oprnd = NULL_TREE;
3306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3307 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3308 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3309 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3310 struct loop *loop = NULL;
ebfd146a
IR
3311 enum machine_mode vec_mode;
3312 tree dummy;
3313 enum dr_alignment_support alignment_support_scheme;
3314 tree def;
3315 gimple def_stmt;
3316 enum vect_def_type dt;
3317 stmt_vec_info prev_stmt_info = NULL;
3318 tree dataref_ptr = NULL_TREE;
3319 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3320 int ncopies;
3321 int j;
3322 gimple next_stmt, first_stmt = NULL;
3323 bool strided_store = false;
3324 unsigned int group_size, i;
3325 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3326 bool inv_p;
3327 VEC(tree,heap) *vec_oprnds = NULL;
3328 bool slp = (slp_node != NULL);
ebfd146a 3329 unsigned int vec_num;
a70d6342
IR
3330 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3331
3332 if (loop_vinfo)
3333 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3334
3335 /* Multiple types in SLP are handled by creating the appropriate number of
3336 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3337 case of SLP. */
3338 if (slp)
3339 ncopies = 1;
3340 else
3341 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3342
3343 gcc_assert (ncopies >= 1);
3344
3345 /* FORNOW. This restriction should be relaxed. */
a70d6342 3346 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a
IR
3347 {
3348 if (vect_print_dump_info (REPORT_DETAILS))
3349 fprintf (vect_dump, "multiple types in nested loop.");
3350 return false;
3351 }
3352
a70d6342 3353 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3354 return false;
3355
8644a673 3356 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3357 return false;
3358
3359 /* Is vectorizable store? */
3360
3361 if (!is_gimple_assign (stmt))
3362 return false;
3363
3364 scalar_dest = gimple_assign_lhs (stmt);
3365 if (TREE_CODE (scalar_dest) != ARRAY_REF
3366 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3367 && TREE_CODE (scalar_dest) != COMPONENT_REF
3368 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3369 && TREE_CODE (scalar_dest) != REALPART_EXPR
3370 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3371 return false;
3372
3373 gcc_assert (gimple_assign_single_p (stmt));
3374 op = gimple_assign_rhs1 (stmt);
a70d6342 3375 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
ebfd146a
IR
3376 {
3377 if (vect_print_dump_info (REPORT_DETAILS))
3378 fprintf (vect_dump, "use not simple.");
3379 return false;
3380 }
3381
3382 /* The scalar rhs type needs to be trivially convertible to the vector
3383 component type. This should always be the case. */
3384 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
b8698a0f 3385 {
ebfd146a
IR
3386 if (vect_print_dump_info (REPORT_DETAILS))
3387 fprintf (vect_dump, "??? operands of different types");
3388 return false;
3389 }
3390
3391 vec_mode = TYPE_MODE (vectype);
3392 /* FORNOW. In some cases can vectorize even if data-type not supported
3393 (e.g. - array initialization with 0). */
947131ba 3394 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
3395 return false;
3396
3397 if (!STMT_VINFO_DATA_REF (stmt_info))
3398 return false;
3399
a1e53f3f
L
3400 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3401 {
3402 if (vect_print_dump_info (REPORT_DETAILS))
3403 fprintf (vect_dump, "negative step for store.");
3404 return false;
3405 }
3406
ebfd146a
IR
3407 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3408 {
3409 strided_store = true;
3410 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3411 if (!vect_strided_store_supported (vectype)
3412 && !PURE_SLP_STMT (stmt_info) && !slp)
3413 return false;
b8698a0f 3414
ebfd146a
IR
3415 if (first_stmt == stmt)
3416 {
3417 /* STMT is the leader of the group. Check the operands of all the
3418 stmts of the group. */
3419 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
3420 while (next_stmt)
3421 {
3422 gcc_assert (gimple_assign_single_p (next_stmt));
3423 op = gimple_assign_rhs1 (next_stmt);
b8698a0f 3424 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
a70d6342 3425 &def, &dt))
ebfd146a
IR
3426 {
3427 if (vect_print_dump_info (REPORT_DETAILS))
3428 fprintf (vect_dump, "use not simple.");
3429 return false;
3430 }
3431 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3432 }
3433 }
3434 }
3435
3436 if (!vec_stmt) /* transformation not required. */
3437 {
3438 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3439 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
3440 return true;
3441 }
3442
3443 /** Transform. **/
3444
3445 if (strided_store)
3446 {
3447 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3448 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3449
3450 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3451
3452 /* FORNOW */
a70d6342 3453 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
3454
3455 /* We vectorize all the stmts of the interleaving group when we
3456 reach the last stmt in the group. */
b8698a0f 3457 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
ebfd146a
IR
3458 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
3459 && !slp)
3460 {
3461 *vec_stmt = NULL;
3462 return true;
3463 }
3464
3465 if (slp)
4b5caab7
IR
3466 {
3467 strided_store = false;
3468 /* VEC_NUM is the number of vect stmts to be created for this
3469 group. */
3470 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3471 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3472 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3473 }
ebfd146a 3474 else
4b5caab7
IR
3475 /* VEC_NUM is the number of vect stmts to be created for this
3476 group. */
ebfd146a
IR
3477 vec_num = group_size;
3478 }
b8698a0f 3479 else
ebfd146a
IR
3480 {
3481 first_stmt = stmt;
3482 first_dr = dr;
3483 group_size = vec_num = 1;
ebfd146a 3484 }
b8698a0f 3485
ebfd146a
IR
3486 if (vect_print_dump_info (REPORT_DETAILS))
3487 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3488
3489 dr_chain = VEC_alloc (tree, heap, group_size);
3490 oprnds = VEC_alloc (tree, heap, group_size);
3491
720f5239 3492 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 3493 gcc_assert (alignment_support_scheme);
ebfd146a
IR
3494
3495 /* In case the vectorization factor (VF) is bigger than the number
3496 of elements that we can fit in a vectype (nunits), we have to generate
3497 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 3498 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
3499 vect_get_vec_def_for_copy_stmt. */
3500
3501 /* In case of interleaving (non-unit strided access):
3502
3503 S1: &base + 2 = x2
3504 S2: &base = x0
3505 S3: &base + 1 = x1
3506 S4: &base + 3 = x3
3507
3508 We create vectorized stores starting from base address (the access of the
3509 first stmt in the chain (S2 in the above example), when the last store stmt
3510 of the chain (S4) is reached:
3511
3512 VS1: &base = vx2
3513 VS2: &base + vec_size*1 = vx0
3514 VS3: &base + vec_size*2 = vx1
3515 VS4: &base + vec_size*3 = vx3
3516
3517 Then permutation statements are generated:
3518
3519 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3520 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3521 ...
b8698a0f 3522
ebfd146a
IR
3523 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3524 (the order of the data-refs in the output of vect_permute_store_chain
3525 corresponds to the order of scalar stmts in the interleaving chain - see
3526 the documentation of vect_permute_store_chain()).
3527
3528 In case of both multiple types and interleaving, above vector stores and
ff802fa1 3529 permutation stmts are created for every copy. The result vector stmts are
ebfd146a 3530 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 3531 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
3532 */
3533
3534 prev_stmt_info = NULL;
3535 for (j = 0; j < ncopies; j++)
3536 {
3537 gimple new_stmt;
3538 gimple ptr_incr;
3539
3540 if (j == 0)
3541 {
3542 if (slp)
3543 {
3544 /* Get vectorized arguments for SLP_NODE. */
9dc3f7de
IR
3545 vect_get_slp_defs (NULL_TREE, NULL_TREE, slp_node, &vec_oprnds,
3546 NULL, -1);
ebfd146a
IR
3547
3548 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3549 }
3550 else
3551 {
b8698a0f
L
3552 /* For interleaved stores we collect vectorized defs for all the
3553 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3554 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
3555 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3556
3557 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3558 OPRNDS are of size 1. */
b8698a0f 3559 next_stmt = first_stmt;
ebfd146a
IR
3560 for (i = 0; i < group_size; i++)
3561 {
b8698a0f
L
3562 /* Since gaps are not supported for interleaved stores,
3563 GROUP_SIZE is the exact number of stmts in the chain.
3564 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3565 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
3566 iteration of the loop will be executed. */
3567 gcc_assert (next_stmt
3568 && gimple_assign_single_p (next_stmt));
3569 op = gimple_assign_rhs1 (next_stmt);
3570
b8698a0f 3571 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 3572 NULL);
b8698a0f
L
3573 VEC_quick_push(tree, dr_chain, vec_oprnd);
3574 VEC_quick_push(tree, oprnds, vec_oprnd);
ebfd146a
IR
3575 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3576 }
3577 }
3578
3579 /* We should have catched mismatched types earlier. */
3580 gcc_assert (useless_type_conversion_p (vectype,
3581 TREE_TYPE (vec_oprnd)));
b8698a0f
L
3582 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3583 &dummy, &ptr_incr, false,
5006671f 3584 &inv_p);
a70d6342 3585 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 3586 }
b8698a0f 3587 else
ebfd146a 3588 {
b8698a0f
L
3589 /* For interleaved stores we created vectorized defs for all the
3590 defs stored in OPRNDS in the previous iteration (previous copy).
3591 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
3592 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3593 next copy.
3594 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3595 OPRNDS are of size 1. */
3596 for (i = 0; i < group_size; i++)
3597 {
3598 op = VEC_index (tree, oprnds, i);
b8698a0f 3599 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 3600 &dt);
b8698a0f 3601 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
ebfd146a
IR
3602 VEC_replace(tree, dr_chain, i, vec_oprnd);
3603 VEC_replace(tree, oprnds, i, vec_oprnd);
3604 }
b8698a0f 3605 dataref_ptr =
ebfd146a
IR
3606 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3607 }
3608
3609 if (strided_store)
3610 {
b8698a0f 3611 result_chain = VEC_alloc (tree, heap, group_size);
ebfd146a
IR
3612 /* Permute. */
3613 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3614 &result_chain))
3615 return false;
3616 }
3617
3618 next_stmt = first_stmt;
3619 for (i = 0; i < vec_num; i++)
3620 {
be1ac4ec
RG
3621 struct ptr_info_def *pi;
3622
ebfd146a
IR
3623 if (i > 0)
3624 /* Bump the vector pointer. */
3625 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3626 NULL_TREE);
3627
3628 if (slp)
3629 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3630 else if (strided_store)
b8698a0f 3631 /* For strided stores vectorized defs are interleaved in
ebfd146a
IR
3632 vect_permute_store_chain(). */
3633 vec_oprnd = VEC_index (tree, result_chain, i);
3634
be1ac4ec
RG
3635 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3636 build_int_cst (reference_alias_ptr_type
3637 (DR_REF (first_dr)), 0));
3638 pi = get_ptr_info (dataref_ptr);
3639 pi->align = TYPE_ALIGN_UNIT (vectype);
8f439681 3640 if (aligned_access_p (first_dr))
be1ac4ec
RG
3641 pi->misalign = 0;
3642 else if (DR_MISALIGNMENT (first_dr) == -1)
3643 {
3644 TREE_TYPE (data_ref)
3645 = build_aligned_type (TREE_TYPE (data_ref),
3646 TYPE_ALIGN (TREE_TYPE (vectype)));
3647 pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
3648 pi->misalign = 0;
3649 }
3650 else
3651 {
3652 TREE_TYPE (data_ref)
3653 = build_aligned_type (TREE_TYPE (data_ref),
3654 TYPE_ALIGN (TREE_TYPE (vectype)));
3655 pi->misalign = DR_MISALIGNMENT (first_dr);
3656 }
8f439681 3657
9dc3f7de 3658 /* Arguments are ready. Create the new vector stmt. */
ebfd146a
IR
3659 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3660 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3661 mark_symbols_for_renaming (new_stmt);
3662
3663 if (slp)
3664 continue;
b8698a0f 3665
ebfd146a
IR
3666 if (j == 0)
3667 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3668 else
3669 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3670
3671 prev_stmt_info = vinfo_for_stmt (new_stmt);
3672 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3673 if (!next_stmt)
3674 break;
3675 }
3676 }
3677
b8698a0f
L
3678 VEC_free (tree, heap, dr_chain);
3679 VEC_free (tree, heap, oprnds);
ebfd146a 3680 if (result_chain)
b8698a0f 3681 VEC_free (tree, heap, result_chain);
ff802fa1
IR
3682 if (vec_oprnds)
3683 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
3684
3685 return true;
3686}
3687
a1e53f3f
L
3688/* Given a vector type VECTYPE returns a builtin DECL to be used
3689 for vector permutation and stores a mask into *MASK that implements
3690 reversal of the vector elements. If that is impossible to do
3691 returns NULL (and *MASK is unchanged). */
3692
3693static tree
3694perm_mask_for_reverse (tree vectype, tree *mask)
3695{
3696 tree builtin_decl;
3697 tree mask_element_type, mask_type;
3698 tree mask_vec = NULL;
3699 int i;
3700 int nunits;
3701 if (!targetm.vectorize.builtin_vec_perm)
3702 return NULL;
3703
3704 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
3705 &mask_element_type);
3706 if (!builtin_decl || !mask_element_type)
3707 return NULL;
3708
3709 mask_type = get_vectype_for_scalar_type (mask_element_type);
3710 nunits = TYPE_VECTOR_SUBPARTS (vectype);
bb67d9c7
RG
3711 if (!mask_type
3712 || TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
a1e53f3f
L
3713 return NULL;
3714
3715 for (i = 0; i < nunits; i++)
3716 mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
3717 mask_vec = build_vector (mask_type, mask_vec);
3718
3719 if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
3720 return NULL;
3721 if (mask)
3722 *mask = mask_vec;
3723 return builtin_decl;
3724}
3725
3726/* Given a vector variable X, that was generated for the scalar LHS of
3727 STMT, generate instructions to reverse the vector elements of X,
3728 insert them a *GSI and return the permuted vector variable. */
3729
3730static tree
3731reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
3732{
3733 tree vectype = TREE_TYPE (x);
3734 tree mask_vec, builtin_decl;
3735 tree perm_dest, data_ref;
3736 gimple perm_stmt;
3737
3738 builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
3739
3740 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3741
3742 /* Generate the permute statement. */
3743 perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
2a2651b7
RG
3744 if (!useless_type_conversion_p (vectype,
3745 TREE_TYPE (TREE_TYPE (builtin_decl))))
3746 {
3747 tree tem = create_tmp_reg (TREE_TYPE (TREE_TYPE (builtin_decl)), NULL);
3748 tem = make_ssa_name (tem, perm_stmt);
3749 gimple_call_set_lhs (perm_stmt, tem);
3750 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3751 perm_stmt = gimple_build_assign (NULL_TREE,
3752 build1 (VIEW_CONVERT_EXPR,
3753 vectype, tem));
3754 }
a1e53f3f 3755 data_ref = make_ssa_name (perm_dest, perm_stmt);
2a2651b7 3756 gimple_set_lhs (perm_stmt, data_ref);
a1e53f3f
L
3757 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3758
3759 return data_ref;
3760}
3761
ebfd146a
IR
3762/* vectorizable_load.
3763
b8698a0f
L
3764 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3765 can be vectorized.
3766 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3767 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3768 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3769
3770static bool
3771vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3772 slp_tree slp_node, slp_instance slp_node_instance)
3773{
3774 tree scalar_dest;
3775 tree vec_dest = NULL;
3776 tree data_ref = NULL;
3777 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 3778 stmt_vec_info prev_stmt_info;
ebfd146a 3779 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3780 struct loop *loop = NULL;
ebfd146a 3781 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 3782 bool nested_in_vect_loop = false;
ebfd146a
IR
3783 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3784 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3785 tree new_temp;
947131ba 3786 enum machine_mode mode;
ebfd146a
IR
3787 gimple new_stmt = NULL;
3788 tree dummy;
3789 enum dr_alignment_support alignment_support_scheme;
3790 tree dataref_ptr = NULL_TREE;
3791 gimple ptr_incr;
3792 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3793 int ncopies;
3794 int i, j, group_size;
3795 tree msq = NULL_TREE, lsq;
3796 tree offset = NULL_TREE;
3797 tree realignment_token = NULL_TREE;
3798 gimple phi = NULL;
3799 VEC(tree,heap) *dr_chain = NULL;
3800 bool strided_load = false;
3801 gimple first_stmt;
3802 tree scalar_type;
3803 bool inv_p;
a1e53f3f 3804 bool negative;
ebfd146a
IR
3805 bool compute_in_loop = false;
3806 struct loop *at_loop;
3807 int vec_num;
3808 bool slp = (slp_node != NULL);
3809 bool slp_perm = false;
3810 enum tree_code code;
a70d6342
IR
3811 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3812 int vf;
3813
3814 if (loop_vinfo)
3815 {
3816 loop = LOOP_VINFO_LOOP (loop_vinfo);
3817 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3818 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3819 }
3820 else
3533e503 3821 vf = 1;
ebfd146a
IR
3822
3823 /* Multiple types in SLP are handled by creating the appropriate number of
ff802fa1 3824 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
ebfd146a
IR
3825 case of SLP. */
3826 if (slp)
3827 ncopies = 1;
3828 else
3829 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3830
3831 gcc_assert (ncopies >= 1);
3832
3833 /* FORNOW. This restriction should be relaxed. */
3834 if (nested_in_vect_loop && ncopies > 1)
3835 {
3836 if (vect_print_dump_info (REPORT_DETAILS))
3837 fprintf (vect_dump, "multiple types in nested loop.");
3838 return false;
3839 }
3840
a70d6342 3841 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3842 return false;
3843
8644a673 3844 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3845 return false;
3846
3847 /* Is vectorizable load? */
3848 if (!is_gimple_assign (stmt))
3849 return false;
3850
3851 scalar_dest = gimple_assign_lhs (stmt);
3852 if (TREE_CODE (scalar_dest) != SSA_NAME)
3853 return false;
3854
3855 code = gimple_assign_rhs_code (stmt);
3856 if (code != ARRAY_REF
3857 && code != INDIRECT_REF
e9dbe7bb
IR
3858 && code != COMPONENT_REF
3859 && code != IMAGPART_EXPR
70f34814
RG
3860 && code != REALPART_EXPR
3861 && code != MEM_REF)
ebfd146a
IR
3862 return false;
3863
3864 if (!STMT_VINFO_DATA_REF (stmt_info))
3865 return false;
3866
a1e53f3f
L
3867 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
3868 if (negative && ncopies > 1)
3869 {
3870 if (vect_print_dump_info (REPORT_DETAILS))
3871 fprintf (vect_dump, "multiple types with negative step.");
3872 return false;
3873 }
3874
ebfd146a 3875 scalar_type = TREE_TYPE (DR_REF (dr));
947131ba 3876 mode = TYPE_MODE (vectype);
ebfd146a
IR
3877
3878 /* FORNOW. In some cases can vectorize even if data-type not supported
3879 (e.g. - data copies). */
947131ba 3880 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
ebfd146a
IR
3881 {
3882 if (vect_print_dump_info (REPORT_DETAILS))
3883 fprintf (vect_dump, "Aligned load, but unsupported type.");
3884 return false;
3885 }
3886
3887 /* The vector component type needs to be trivially convertible to the
3888 scalar lhs. This should always be the case. */
3889 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
b8698a0f 3890 {
ebfd146a
IR
3891 if (vect_print_dump_info (REPORT_DETAILS))
3892 fprintf (vect_dump, "??? operands of different types");
3893 return false;
3894 }
3895
3896 /* Check if the load is a part of an interleaving chain. */
3897 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3898 {
3899 strided_load = true;
3900 /* FORNOW */
3901 gcc_assert (! nested_in_vect_loop);
3902
3903 /* Check if interleaving is supported. */
3904 if (!vect_strided_load_supported (vectype)
3905 && !PURE_SLP_STMT (stmt_info) && !slp)
3906 return false;
3907 }
3908
a1e53f3f
L
3909 if (negative)
3910 {
3911 gcc_assert (!strided_load);
3912 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
3913 if (alignment_support_scheme != dr_aligned
3914 && alignment_support_scheme != dr_unaligned_supported)
3915 {
3916 if (vect_print_dump_info (REPORT_DETAILS))
3917 fprintf (vect_dump, "negative step but alignment required.");
3918 return false;
3919 }
3920 if (!perm_mask_for_reverse (vectype, NULL))
3921 {
3922 if (vect_print_dump_info (REPORT_DETAILS))
3923 fprintf (vect_dump, "negative step and reversing not supported.");
3924 return false;
3925 }
3926 }
3927
ebfd146a
IR
3928 if (!vec_stmt) /* transformation not required. */
3929 {
3930 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3931 vect_model_load_cost (stmt_info, ncopies, NULL);
3932 return true;
3933 }
3934
3935 if (vect_print_dump_info (REPORT_DETAILS))
3936 fprintf (vect_dump, "transform load.");
3937
3938 /** Transform. **/
3939
3940 if (strided_load)
3941 {
3942 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3943 /* Check if the chain of loads is already vectorized. */
3944 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3945 {
3946 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3947 return true;
3948 }
3949 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3950 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3951
3952 /* VEC_NUM is the number of vect stmts to be created for this group. */
3953 if (slp)
3954 {
3955 strided_load = false;
3956 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
a70d6342
IR
3957 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3958 slp_perm = true;
3959 }
ebfd146a
IR
3960 else
3961 vec_num = group_size;
3962
3963 dr_chain = VEC_alloc (tree, heap, vec_num);
3964 }
3965 else
3966 {
3967 first_stmt = stmt;
3968 first_dr = dr;
3969 group_size = vec_num = 1;
3970 }
3971
720f5239 3972 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a
IR
3973 gcc_assert (alignment_support_scheme);
3974
3975 /* In case the vectorization factor (VF) is bigger than the number
3976 of elements that we can fit in a vectype (nunits), we have to generate
3977 more than one vector stmt - i.e - we need to "unroll" the
ff802fa1 3978 vector stmt by a factor VF/nunits. In doing so, we record a pointer
ebfd146a 3979 from one copy of the vector stmt to the next, in the field
ff802fa1 3980 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
ebfd146a 3981 stages to find the correct vector defs to be used when vectorizing
ff802fa1
IR
3982 stmts that use the defs of the current stmt. The example below
3983 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
3984 need to create 4 vectorized stmts):
ebfd146a
IR
3985
3986 before vectorization:
3987 RELATED_STMT VEC_STMT
3988 S1: x = memref - -
3989 S2: z = x + 1 - -
3990
3991 step 1: vectorize stmt S1:
3992 We first create the vector stmt VS1_0, and, as usual, record a
3993 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3994 Next, we create the vector stmt VS1_1, and record a pointer to
3995 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
ff802fa1 3996 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
ebfd146a
IR
3997 stmts and pointers:
3998 RELATED_STMT VEC_STMT
3999 VS1_0: vx0 = memref0 VS1_1 -
4000 VS1_1: vx1 = memref1 VS1_2 -
4001 VS1_2: vx2 = memref2 VS1_3 -
4002 VS1_3: vx3 = memref3 - -
4003 S1: x = load - VS1_0
4004 S2: z = x + 1 - -
4005
b8698a0f
L
4006 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4007 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
4008 stmt S2. */
4009
4010 /* In case of interleaving (non-unit strided access):
4011
4012 S1: x2 = &base + 2
4013 S2: x0 = &base
4014 S3: x1 = &base + 1
4015 S4: x3 = &base + 3
4016
b8698a0f 4017 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
4018 starting from the access of the first stmt of the chain:
4019
4020 VS1: vx0 = &base
4021 VS2: vx1 = &base + vec_size*1
4022 VS3: vx3 = &base + vec_size*2
4023 VS4: vx4 = &base + vec_size*3
4024
4025 Then permutation statements are generated:
4026
4027 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4028 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4029 ...
4030
4031 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4032 (the order of the data-refs in the output of vect_permute_load_chain
4033 corresponds to the order of scalar stmts in the interleaving chain - see
4034 the documentation of vect_permute_load_chain()).
4035 The generation of permutation stmts and recording them in
4036 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4037
b8698a0f 4038 In case of both multiple types and interleaving, the vector loads and
ff802fa1
IR
4039 permutation stmts above are created for every copy. The result vector
4040 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4041 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
ebfd146a
IR
4042
4043 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4044 on a target that supports unaligned accesses (dr_unaligned_supported)
4045 we generate the following code:
4046 p = initial_addr;
4047 indx = 0;
4048 loop {
4049 p = p + indx * vectype_size;
4050 vec_dest = *(p);
4051 indx = indx + 1;
4052 }
4053
4054 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 4055 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
4056 then generate the following code, in which the data in each iteration is
4057 obtained by two vector loads, one from the previous iteration, and one
4058 from the current iteration:
4059 p1 = initial_addr;
4060 msq_init = *(floor(p1))
4061 p2 = initial_addr + VS - 1;
4062 realignment_token = call target_builtin;
4063 indx = 0;
4064 loop {
4065 p2 = p2 + indx * vectype_size
4066 lsq = *(floor(p2))
4067 vec_dest = realign_load (msq, lsq, realignment_token)
4068 indx = indx + 1;
4069 msq = lsq;
4070 } */
4071
4072 /* If the misalignment remains the same throughout the execution of the
4073 loop, we can create the init_addr and permutation mask at the loop
ff802fa1 4074 preheader. Otherwise, it needs to be created inside the loop.
ebfd146a
IR
4075 This can only occur when vectorizing memory accesses in the inner-loop
4076 nested within an outer-loop that is being vectorized. */
4077
a70d6342 4078 if (loop && nested_in_vect_loop_p (loop, stmt)
ebfd146a
IR
4079 && (TREE_INT_CST_LOW (DR_STEP (dr))
4080 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4081 {
4082 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4083 compute_in_loop = true;
4084 }
4085
4086 if ((alignment_support_scheme == dr_explicit_realign_optimized
4087 || alignment_support_scheme == dr_explicit_realign)
4088 && !compute_in_loop)
4089 {
4090 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4091 alignment_support_scheme, NULL_TREE,
4092 &at_loop);
4093 if (alignment_support_scheme == dr_explicit_realign_optimized)
4094 {
4095 phi = SSA_NAME_DEF_STMT (msq);
4096 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4097 }
4098 }
4099 else
4100 at_loop = loop;
4101
a1e53f3f
L
4102 if (negative)
4103 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4104
ebfd146a
IR
4105 prev_stmt_info = NULL;
4106 for (j = 0; j < ncopies; j++)
b8698a0f 4107 {
ebfd146a
IR
4108 /* 1. Create the vector pointer update chain. */
4109 if (j == 0)
4110 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
b8698a0f
L
4111 at_loop, offset,
4112 &dummy, &ptr_incr, false,
5006671f 4113 &inv_p);
ebfd146a 4114 else
b8698a0f 4115 dataref_ptr =
ebfd146a
IR
4116 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
4117
4118 for (i = 0; i < vec_num; i++)
4119 {
4120 if (i > 0)
4121 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4122 NULL_TREE);
4123
4124 /* 2. Create the vector-load in the loop. */
4125 switch (alignment_support_scheme)
4126 {
4127 case dr_aligned:
ebfd146a
IR
4128 case dr_unaligned_supported:
4129 {
be1ac4ec
RG
4130 struct ptr_info_def *pi;
4131 data_ref
4132 = build2 (MEM_REF, vectype, dataref_ptr,
4133 build_int_cst (reference_alias_ptr_type
4134 (DR_REF (first_dr)), 0));
4135 pi = get_ptr_info (dataref_ptr);
4136 pi->align = TYPE_ALIGN_UNIT (vectype);
4137 if (alignment_support_scheme == dr_aligned)
4138 {
4139 gcc_assert (aligned_access_p (first_dr));
4140 pi->misalign = 0;
4141 }
4142 else if (DR_MISALIGNMENT (first_dr) == -1)
4143 {
4144 TREE_TYPE (data_ref)
4145 = build_aligned_type (TREE_TYPE (data_ref),
4146 TYPE_ALIGN (TREE_TYPE (vectype)));
4147 pi->align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
4148 pi->misalign = 0;
4149 }
4150 else
4151 {
4152 TREE_TYPE (data_ref)
4153 = build_aligned_type (TREE_TYPE (data_ref),
4154 TYPE_ALIGN (TREE_TYPE (vectype)));
4155 pi->misalign = DR_MISALIGNMENT (first_dr);
4156 }
ebfd146a
IR
4157 break;
4158 }
4159 case dr_explicit_realign:
4160 {
4161 tree ptr, bump;
4162 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4163
4164 if (compute_in_loop)
4165 msq = vect_setup_realignment (first_stmt, gsi,
4166 &realignment_token,
b8698a0f 4167 dr_explicit_realign,
ebfd146a
IR
4168 dataref_ptr, NULL);
4169
75421dcd
RG
4170 new_stmt = gimple_build_assign_with_ops
4171 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4172 build_int_cst
4173 (TREE_TYPE (dataref_ptr),
4174 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4175 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4176 gimple_assign_set_lhs (new_stmt, ptr);
4177 vect_finish_stmt_generation (stmt, new_stmt, gsi);
20ede5c6
RG
4178 data_ref
4179 = build2 (MEM_REF, vectype, ptr,
4180 build_int_cst (reference_alias_ptr_type
4181 (DR_REF (first_dr)), 0));
ebfd146a
IR
4182 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4183 new_stmt = gimple_build_assign (vec_dest, data_ref);
4184 new_temp = make_ssa_name (vec_dest, new_stmt);
4185 gimple_assign_set_lhs (new_stmt, new_temp);
5006671f
RG
4186 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4187 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
ebfd146a 4188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
4189 msq = new_temp;
4190
4191 bump = size_binop (MULT_EXPR, vs_minus_1,
4192 TYPE_SIZE_UNIT (scalar_type));
4193 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
75421dcd
RG
4194 new_stmt = gimple_build_assign_with_ops
4195 (BIT_AND_EXPR, NULL_TREE, ptr,
4196 build_int_cst
4197 (TREE_TYPE (ptr),
4198 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4199 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4200 gimple_assign_set_lhs (new_stmt, ptr);
4201 vect_finish_stmt_generation (stmt, new_stmt, gsi);
20ede5c6
RG
4202 data_ref
4203 = build2 (MEM_REF, vectype, ptr,
4204 build_int_cst (reference_alias_ptr_type
4205 (DR_REF (first_dr)), 0));
ebfd146a
IR
4206 break;
4207 }
4208 case dr_explicit_realign_optimized:
75421dcd
RG
4209 new_stmt = gimple_build_assign_with_ops
4210 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4211 build_int_cst
4212 (TREE_TYPE (dataref_ptr),
4213 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4214 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4215 gimple_assign_set_lhs (new_stmt, new_temp);
4216 vect_finish_stmt_generation (stmt, new_stmt, gsi);
20ede5c6
RG
4217 data_ref
4218 = build2 (MEM_REF, vectype, new_temp,
4219 build_int_cst (reference_alias_ptr_type
4220 (DR_REF (first_dr)), 0));
ebfd146a
IR
4221 break;
4222 default:
4223 gcc_unreachable ();
4224 }
4225 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4226 new_stmt = gimple_build_assign (vec_dest, data_ref);
4227 new_temp = make_ssa_name (vec_dest, new_stmt);
4228 gimple_assign_set_lhs (new_stmt, new_temp);
4229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4230 mark_symbols_for_renaming (new_stmt);
4231
ff802fa1 4232 /* 3. Handle explicit realignment if necessary/supported. Create in
ebfd146a
IR
4233 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
4234 if (alignment_support_scheme == dr_explicit_realign_optimized
4235 || alignment_support_scheme == dr_explicit_realign)
4236 {
4237 tree tmp;
4238
4239 lsq = gimple_assign_lhs (new_stmt);
4240 if (!realignment_token)
4241 realignment_token = dataref_ptr;
4242 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4243 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
4244 realignment_token);
4245 new_stmt = gimple_build_assign (vec_dest, tmp);
4246 new_temp = make_ssa_name (vec_dest, new_stmt);
4247 gimple_assign_set_lhs (new_stmt, new_temp);
4248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4249
4250 if (alignment_support_scheme == dr_explicit_realign_optimized)
4251 {
4252 gcc_assert (phi);
4253 if (i == vec_num - 1 && j == ncopies - 1)
f5045c96
AM
4254 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
4255 UNKNOWN_LOCATION);
ebfd146a
IR
4256 msq = lsq;
4257 }
4258 }
4259
4260 /* 4. Handle invariant-load. */
a70d6342 4261 if (inv_p && !bb_vinfo)
ebfd146a
IR
4262 {
4263 gcc_assert (!strided_load);
4264 gcc_assert (nested_in_vect_loop_p (loop, stmt));
4265 if (j == 0)
4266 {
4267 int k;
4268 tree t = NULL_TREE;
4269 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
4270
4271 /* CHECKME: bitpos depends on endianess? */
4272 bitpos = bitsize_zero_node;
b8698a0f 4273 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
ebfd146a 4274 bitsize, bitpos);
b8698a0f 4275 vec_dest =
ebfd146a
IR
4276 vect_create_destination_var (scalar_dest, NULL_TREE);
4277 new_stmt = gimple_build_assign (vec_dest, vec_inv);
4278 new_temp = make_ssa_name (vec_dest, new_stmt);
4279 gimple_assign_set_lhs (new_stmt, new_temp);
4280 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4281
4282 for (k = nunits - 1; k >= 0; --k)
4283 t = tree_cons (NULL_TREE, new_temp, t);
4284 /* FIXME: use build_constructor directly. */
4285 vec_inv = build_constructor_from_list (vectype, t);
4286 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4287 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4288 }
4289 else
4290 gcc_unreachable (); /* FORNOW. */
4291 }
4292
a1e53f3f
L
4293 if (negative)
4294 {
4295 new_temp = reverse_vec_elements (new_temp, stmt, gsi);
4296 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4297 }
4298
ebfd146a
IR
4299 /* Collect vector loads and later create their permutation in
4300 vect_transform_strided_load (). */
4301 if (strided_load || slp_perm)
4302 VEC_quick_push (tree, dr_chain, new_temp);
4303
4304 /* Store vector loads in the corresponding SLP_NODE. */
4305 if (slp && !slp_perm)
4306 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4307 }
4308
4309 if (slp && !slp_perm)
4310 continue;
4311
4312 if (slp_perm)
4313 {
a70d6342 4314 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
ebfd146a
IR
4315 slp_node_instance, false))
4316 {
4317 VEC_free (tree, heap, dr_chain);
4318 return false;
4319 }
4320 }
4321 else
4322 {
4323 if (strided_load)
4324 {
4325 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
b8698a0f 4326 return false;
ebfd146a
IR
4327
4328 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4329 VEC_free (tree, heap, dr_chain);
4330 dr_chain = VEC_alloc (tree, heap, group_size);
4331 }
4332 else
4333 {
4334 if (j == 0)
4335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4336 else
4337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4338 prev_stmt_info = vinfo_for_stmt (new_stmt);
4339 }
4340 }
4341 }
4342
4343 if (dr_chain)
4344 VEC_free (tree, heap, dr_chain);
4345
4346 return true;
4347}
4348
4349/* Function vect_is_simple_cond.
b8698a0f 4350
ebfd146a
IR
4351 Input:
4352 LOOP - the loop that is being vectorized.
4353 COND - Condition that is checked for simple use.
4354
4355 Returns whether a COND can be vectorized. Checks whether
4356 condition operands are supportable using vec_is_simple_use. */
4357
4358static bool
4359vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4360{
4361 tree lhs, rhs;
4362 tree def;
4363 enum vect_def_type dt;
4364
4365 if (!COMPARISON_CLASS_P (cond))
4366 return false;
4367
4368 lhs = TREE_OPERAND (cond, 0);
4369 rhs = TREE_OPERAND (cond, 1);
4370
4371 if (TREE_CODE (lhs) == SSA_NAME)
4372 {
4373 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
b8698a0f 4374 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
a70d6342 4375 &dt))
ebfd146a
IR
4376 return false;
4377 }
4378 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4379 && TREE_CODE (lhs) != FIXED_CST)
4380 return false;
4381
4382 if (TREE_CODE (rhs) == SSA_NAME)
4383 {
4384 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
b8698a0f 4385 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
a70d6342 4386 &dt))
ebfd146a
IR
4387 return false;
4388 }
4389 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4390 && TREE_CODE (rhs) != FIXED_CST)
4391 return false;
4392
4393 return true;
4394}
4395
4396/* vectorizable_condition.
4397
b8698a0f
L
4398 Check if STMT is conditional modify expression that can be vectorized.
4399 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4400 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
4401 at GSI.
4402
4403 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4404 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4405 else caluse if it is 2).
ebfd146a
IR
4406
4407 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4408
4bbe8262 4409bool
ebfd146a 4410vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4bbe8262 4411 gimple *vec_stmt, tree reduc_def, int reduc_index)
ebfd146a
IR
4412{
4413 tree scalar_dest = NULL_TREE;
4414 tree vec_dest = NULL_TREE;
4415 tree op = NULL_TREE;
4416 tree cond_expr, then_clause, else_clause;
4417 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4418 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ff802fa1
IR
4419 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4420 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
ebfd146a
IR
4421 tree vec_compare, vec_cond_expr;
4422 tree new_temp;
4423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4424 enum machine_mode vec_mode;
4425 tree def;
a855b1b1 4426 enum vect_def_type dt, dts[4];
ebfd146a
IR
4427 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4428 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4429 enum tree_code code;
a855b1b1
MM
4430 stmt_vec_info prev_stmt_info = NULL;
4431 int j;
ebfd146a 4432
a70d6342
IR
4433 /* FORNOW: unsupported in basic block SLP. */
4434 gcc_assert (loop_vinfo);
b8698a0f 4435
ebfd146a 4436 gcc_assert (ncopies >= 1);
a855b1b1 4437 if (reduc_index && ncopies > 1)
ebfd146a
IR
4438 return false; /* FORNOW */
4439
4440 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4441 return false;
4442
4bbe8262
IR
4443 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4444 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4445 && reduc_def))
ebfd146a
IR
4446 return false;
4447
4448 /* FORNOW: SLP not supported. */
4449 if (STMT_SLP_TYPE (stmt_info))
4450 return false;
4451
4452 /* FORNOW: not yet supported. */
b8698a0f 4453 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a
IR
4454 {
4455 if (vect_print_dump_info (REPORT_DETAILS))
4456 fprintf (vect_dump, "value used after loop.");
4457 return false;
4458 }
4459
4460 /* Is vectorizable conditional operation? */
4461 if (!is_gimple_assign (stmt))
4462 return false;
4463
4464 code = gimple_assign_rhs_code (stmt);
4465
4466 if (code != COND_EXPR)
4467 return false;
4468
4469 gcc_assert (gimple_assign_single_p (stmt));
4470 op = gimple_assign_rhs1 (stmt);
4471 cond_expr = TREE_OPERAND (op, 0);
4472 then_clause = TREE_OPERAND (op, 1);
4473 else_clause = TREE_OPERAND (op, 2);
4474
4475 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4476 return false;
4477
4478 /* We do not handle two different vector types for the condition
4479 and the values. */
8533c9d8
SP
4480 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
4481 TREE_TYPE (vectype)))
ebfd146a
IR
4482 return false;
4483
4484 if (TREE_CODE (then_clause) == SSA_NAME)
4485 {
4486 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
b8698a0f 4487 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
ebfd146a
IR
4488 &then_def_stmt, &def, &dt))
4489 return false;
4490 }
b8698a0f 4491 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
4492 && TREE_CODE (then_clause) != REAL_CST
4493 && TREE_CODE (then_clause) != FIXED_CST)
4494 return false;
4495
4496 if (TREE_CODE (else_clause) == SSA_NAME)
4497 {
4498 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
a70d6342 4499 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
ebfd146a
IR
4500 &else_def_stmt, &def, &dt))
4501 return false;
4502 }
b8698a0f 4503 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
4504 && TREE_CODE (else_clause) != REAL_CST
4505 && TREE_CODE (else_clause) != FIXED_CST)
4506 return false;
4507
4508
4509 vec_mode = TYPE_MODE (vectype);
4510
b8698a0f 4511 if (!vec_stmt)
ebfd146a
IR
4512 {
4513 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8e7aa1f9 4514 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
ebfd146a
IR
4515 }
4516
4517 /* Transform */
4518
4519 /* Handle def. */
4520 scalar_dest = gimple_assign_lhs (stmt);
4521 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4522
4523 /* Handle cond expr. */
a855b1b1
MM
4524 for (j = 0; j < ncopies; j++)
4525 {
4526 gimple new_stmt;
4527 if (j == 0)
4528 {
4529 gimple gtemp;
4530 vec_cond_lhs =
4531 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
4532 stmt, NULL);
4533 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
4534 NULL, &gtemp, &def, &dts[0]);
4535 vec_cond_rhs =
4536 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
4537 stmt, NULL);
4538 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
4539 NULL, &gtemp, &def, &dts[1]);
4540 if (reduc_index == 1)
4541 vec_then_clause = reduc_def;
4542 else
4543 {
4544 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
4545 stmt, NULL);
4546 vect_is_simple_use (then_clause, loop_vinfo,
4547 NULL, &gtemp, &def, &dts[2]);
4548 }
4549 if (reduc_index == 2)
4550 vec_else_clause = reduc_def;
4551 else
4552 {
4553 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
4554 stmt, NULL);
4555 vect_is_simple_use (else_clause, loop_vinfo,
4556 NULL, &gtemp, &def, &dts[3]);
4557 }
4558 }
4559 else
4560 {
4561 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0], vec_cond_lhs);
4562 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1], vec_cond_rhs);
4563 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
4564 vec_then_clause);
4565 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
4566 vec_else_clause);
4567 }
4568
9dc3f7de 4569 /* Arguments are ready. Create the new vector stmt. */
a855b1b1
MM
4570 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4571 vec_cond_lhs, vec_cond_rhs);
4572 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4573 vec_compare, vec_then_clause, vec_else_clause);
4574
4575 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4576 new_temp = make_ssa_name (vec_dest, new_stmt);
4577 gimple_assign_set_lhs (new_stmt, new_temp);
4578 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4579 if (j == 0)
4580 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4581 else
4582 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4583
4584 prev_stmt_info = vinfo_for_stmt (new_stmt);
4585 }
b8698a0f 4586
ebfd146a
IR
4587 return true;
4588}
4589
4590
8644a673 4591/* Make sure the statement is vectorizable. */
ebfd146a
IR
4592
4593bool
a70d6342 4594vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 4595{
8644a673 4596 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 4597 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 4598 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 4599 bool ok;
a70d6342 4600 tree scalar_type, vectype;
ebfd146a
IR
4601
4602 if (vect_print_dump_info (REPORT_DETAILS))
ebfd146a 4603 {
8644a673
IR
4604 fprintf (vect_dump, "==> examining statement: ");
4605 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4606 }
ebfd146a 4607
1825a1f3 4608 if (gimple_has_volatile_ops (stmt))
b8698a0f 4609 {
1825a1f3
IR
4610 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4611 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4612
4613 return false;
4614 }
b8698a0f
L
4615
4616 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
4617 to include:
4618 - the COND_EXPR which is the loop exit condition
4619 - any LABEL_EXPRs in the loop
b8698a0f 4620 - computations that are used only for array indexing or loop control.
8644a673
IR
4621 In basic blocks we only analyze statements that are a part of some SLP
4622 instance, therefore, all the statements are relevant. */
ebfd146a 4623
b8698a0f 4624 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 4625 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a
IR
4626 {
4627 if (vect_print_dump_info (REPORT_DETAILS))
8644a673 4628 fprintf (vect_dump, "irrelevant.");
ebfd146a 4629
8644a673
IR
4630 return true;
4631 }
ebfd146a 4632
8644a673
IR
4633 switch (STMT_VINFO_DEF_TYPE (stmt_info))
4634 {
4635 case vect_internal_def:
4636 break;
ebfd146a 4637
8644a673 4638 case vect_reduction_def:
7c5222ff 4639 case vect_nested_cycle:
a70d6342 4640 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 4641 || relevance == vect_used_in_outer_by_reduction
a70d6342 4642 || relevance == vect_unused_in_scope));
8644a673
IR
4643 break;
4644
4645 case vect_induction_def:
4646 case vect_constant_def:
4647 case vect_external_def:
4648 case vect_unknown_def_type:
4649 default:
4650 gcc_unreachable ();
4651 }
ebfd146a 4652
a70d6342
IR
4653 if (bb_vinfo)
4654 {
4655 gcc_assert (PURE_SLP_STMT (stmt_info));
4656
b690cc0f 4657 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
a70d6342
IR
4658 if (vect_print_dump_info (REPORT_DETAILS))
4659 {
4660 fprintf (vect_dump, "get vectype for scalar type: ");
4661 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4662 }
4663
4664 vectype = get_vectype_for_scalar_type (scalar_type);
4665 if (!vectype)
4666 {
4667 if (vect_print_dump_info (REPORT_DETAILS))
4668 {
4669 fprintf (vect_dump, "not SLPed: unsupported data-type ");
4670 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4671 }
4672 return false;
4673 }
4674
4675 if (vect_print_dump_info (REPORT_DETAILS))
4676 {
4677 fprintf (vect_dump, "vectype: ");
4678 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4679 }
4680
4681 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4682 }
4683
8644a673 4684 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 4685 {
8644a673
IR
4686 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4687 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4688 *need_to_vectorize = true;
ebfd146a
IR
4689 }
4690
8644a673 4691 ok = true;
b8698a0f 4692 if (!bb_vinfo
a70d6342
IR
4693 && (STMT_VINFO_RELEVANT_P (stmt_info)
4694 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8644a673
IR
4695 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4696 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4697 || vectorizable_conversion (stmt, NULL, NULL, NULL)
9dc3f7de 4698 || vectorizable_shift (stmt, NULL, NULL, NULL)
8644a673
IR
4699 || vectorizable_operation (stmt, NULL, NULL, NULL)
4700 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4701 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4702 || vectorizable_call (stmt, NULL, NULL)
4703 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 4704 || vectorizable_reduction (stmt, NULL, NULL, NULL)
4bbe8262 4705 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
a70d6342
IR
4706 else
4707 {
4708 if (bb_vinfo)
57416708 4709 ok = (vectorizable_shift (stmt, NULL, NULL, node)
9dc3f7de 4710 || vectorizable_operation (stmt, NULL, NULL, node)
a70d6342
IR
4711 || vectorizable_assignment (stmt, NULL, NULL, node)
4712 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4713 || vectorizable_store (stmt, NULL, NULL, node));
b8698a0f 4714 }
8644a673
IR
4715
4716 if (!ok)
ebfd146a 4717 {
8644a673
IR
4718 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4719 {
4720 fprintf (vect_dump, "not vectorized: relevant stmt not ");
4721 fprintf (vect_dump, "supported: ");
4722 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4723 }
b8698a0f 4724
ebfd146a
IR
4725 return false;
4726 }
4727
a70d6342
IR
4728 if (bb_vinfo)
4729 return true;
4730
8644a673
IR
4731 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4732 need extra handling, except for vectorizable reductions. */
4733 if (STMT_VINFO_LIVE_P (stmt_info)
4734 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4735 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 4736
8644a673 4737 if (!ok)
ebfd146a 4738 {
8644a673
IR
4739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4740 {
4741 fprintf (vect_dump, "not vectorized: live stmt not ");
4742 fprintf (vect_dump, "supported: ");
4743 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4744 }
b8698a0f 4745
8644a673 4746 return false;
ebfd146a
IR
4747 }
4748
8644a673 4749 if (!PURE_SLP_STMT (stmt_info))
ebfd146a 4750 {
b8698a0f 4751 /* Groups of strided accesses whose size is not a power of 2 are not
ff802fa1 4752 vectorizable yet using loop-vectorization. Therefore, if this stmt
b8698a0f 4753 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
a70d6342 4754 loop-based vectorized), the loop cannot be vectorized. */
8644a673
IR
4755 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4756 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4757 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
ebfd146a 4758 {
8644a673
IR
4759 if (vect_print_dump_info (REPORT_DETAILS))
4760 {
4761 fprintf (vect_dump, "not vectorized: the size of group "
4762 "of strided accesses is not a power of 2");
4763 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4764 }
4765
ebfd146a
IR
4766 return false;
4767 }
4768 }
b8698a0f 4769
ebfd146a
IR
4770 return true;
4771}
4772
4773
4774/* Function vect_transform_stmt.
4775
4776 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4777
4778bool
4779vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
b8698a0f 4780 bool *strided_store, slp_tree slp_node,
ebfd146a
IR
4781 slp_instance slp_node_instance)
4782{
4783 bool is_store = false;
4784 gimple vec_stmt = NULL;
4785 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
039d9ea1 4786 gimple orig_stmt_in_pattern, orig_scalar_stmt = stmt;
ebfd146a 4787 bool done;
ebfd146a
IR
4788
4789 switch (STMT_VINFO_TYPE (stmt_info))
4790 {
4791 case type_demotion_vec_info_type:
4792 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4793 gcc_assert (done);
4794 break;
4795
4796 case type_promotion_vec_info_type:
4797 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4798 gcc_assert (done);
4799 break;
4800
4801 case type_conversion_vec_info_type:
4802 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4803 gcc_assert (done);
4804 break;
4805
4806 case induc_vec_info_type:
4807 gcc_assert (!slp_node);
4808 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4809 gcc_assert (done);
4810 break;
4811
9dc3f7de
IR
4812 case shift_vec_info_type:
4813 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
4814 gcc_assert (done);
4815 break;
4816
ebfd146a
IR
4817 case op_vec_info_type:
4818 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4819 gcc_assert (done);
4820 break;
4821
4822 case assignment_vec_info_type:
4823 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4824 gcc_assert (done);
4825 break;
4826
4827 case load_vec_info_type:
b8698a0f 4828 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
4829 slp_node_instance);
4830 gcc_assert (done);
4831 break;
4832
4833 case store_vec_info_type:
4834 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4835 gcc_assert (done);
4836 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4837 {
4838 /* In case of interleaving, the whole chain is vectorized when the
ff802fa1 4839 last store in the chain is reached. Store stmts before the last
ebfd146a
IR
4840 one are skipped, and there vec_stmt_info shouldn't be freed
4841 meanwhile. */
4842 *strided_store = true;
4843 if (STMT_VINFO_VEC_STMT (stmt_info))
4844 is_store = true;
4845 }
4846 else
4847 is_store = true;
4848 break;
4849
4850 case condition_vec_info_type:
4851 gcc_assert (!slp_node);
4bbe8262 4852 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
ebfd146a
IR
4853 gcc_assert (done);
4854 break;
4855
4856 case call_vec_info_type:
4857 gcc_assert (!slp_node);
4858 done = vectorizable_call (stmt, gsi, &vec_stmt);
039d9ea1 4859 stmt = gsi_stmt (*gsi);
ebfd146a
IR
4860 break;
4861
4862 case reduc_vec_info_type:
b5aeb3bb 4863 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
4864 gcc_assert (done);
4865 break;
4866
4867 default:
4868 if (!STMT_VINFO_LIVE_P (stmt_info))
4869 {
4870 if (vect_print_dump_info (REPORT_DETAILS))
4871 fprintf (vect_dump, "stmt not supported.");
4872 gcc_unreachable ();
4873 }
4874 }
4875
4876 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4877 is being vectorized, but outside the immediately enclosing loop. */
4878 if (vec_stmt
a70d6342
IR
4879 && STMT_VINFO_LOOP_VINFO (stmt_info)
4880 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4881 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
4882 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4883 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 4884 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 4885 vect_used_in_outer_by_reduction))
ebfd146a 4886 {
a70d6342
IR
4887 struct loop *innerloop = LOOP_VINFO_LOOP (
4888 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
4889 imm_use_iterator imm_iter;
4890 use_operand_p use_p;
4891 tree scalar_dest;
4892 gimple exit_phi;
4893
4894 if (vect_print_dump_info (REPORT_DETAILS))
a70d6342 4895 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
ebfd146a
IR
4896
4897 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4898 (to be used when vectorizing outer-loop stmts that use the DEF of
4899 STMT). */
4900 if (gimple_code (stmt) == GIMPLE_PHI)
4901 scalar_dest = PHI_RESULT (stmt);
4902 else
4903 scalar_dest = gimple_assign_lhs (stmt);
4904
4905 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4906 {
4907 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4908 {
4909 exit_phi = USE_STMT (use_p);
4910 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4911 }
4912 }
4913 }
4914
4915 /* Handle stmts whose DEF is used outside the loop-nest that is
4916 being vectorized. */
4917 if (STMT_VINFO_LIVE_P (stmt_info)
4918 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4919 {
4920 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4921 gcc_assert (done);
4922 }
4923
4924 if (vec_stmt)
4925 {
4926 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4927 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4928 if (orig_stmt_in_pattern)
4929 {
4930 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4931 /* STMT was inserted by the vectorizer to replace a computation idiom.
b8698a0f
L
4932 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4933 computed this idiom. We need to record a pointer to VEC_STMT in
4934 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
ebfd146a
IR
4935 documentation of vect_pattern_recog. */
4936 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4937 {
039d9ea1
IR
4938 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo)
4939 == orig_scalar_stmt);
ebfd146a
IR
4940 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4941 }
4942 }
4943 }
4944
b8698a0f 4945 return is_store;
ebfd146a
IR
4946}
4947
4948
b8698a0f 4949/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
4950 stmt_vec_info. */
4951
4952void
4953vect_remove_stores (gimple first_stmt)
4954{
4955 gimple next = first_stmt;
4956 gimple tmp;
4957 gimple_stmt_iterator next_si;
4958
4959 while (next)
4960 {
4961 /* Free the attached stmt_vec_info and remove the stmt. */
4962 next_si = gsi_for_stmt (next);
4963 gsi_remove (&next_si, true);
4964 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4965 free_stmt_vec_info (next);
4966 next = tmp;
4967 }
4968}
4969
4970
4971/* Function new_stmt_vec_info.
4972
4973 Create and initialize a new stmt_vec_info struct for STMT. */
4974
4975stmt_vec_info
b8698a0f 4976new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 4977 bb_vec_info bb_vinfo)
ebfd146a
IR
4978{
4979 stmt_vec_info res;
4980 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4981
4982 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4983 STMT_VINFO_STMT (res) = stmt;
4984 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 4985 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 4986 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
4987 STMT_VINFO_LIVE_P (res) = false;
4988 STMT_VINFO_VECTYPE (res) = NULL;
4989 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 4990 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
4991 STMT_VINFO_IN_PATTERN_P (res) = false;
4992 STMT_VINFO_RELATED_STMT (res) = NULL;
4993 STMT_VINFO_DATA_REF (res) = NULL;
4994
4995 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4996 STMT_VINFO_DR_OFFSET (res) = NULL;
4997 STMT_VINFO_DR_INIT (res) = NULL;
4998 STMT_VINFO_DR_STEP (res) = NULL;
4999 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5000
5001 if (gimple_code (stmt) == GIMPLE_PHI
5002 && is_loop_header_bb_p (gimple_bb (stmt)))
5003 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5004 else
8644a673
IR
5005 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5006
ebfd146a
IR
5007 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5008 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5009 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
32e8bb8e 5010 STMT_SLP_TYPE (res) = loop_vect;
ebfd146a
IR
5011 DR_GROUP_FIRST_DR (res) = NULL;
5012 DR_GROUP_NEXT_DR (res) = NULL;
5013 DR_GROUP_SIZE (res) = 0;
5014 DR_GROUP_STORE_COUNT (res) = 0;
5015 DR_GROUP_GAP (res) = 0;
5016 DR_GROUP_SAME_DR_STMT (res) = NULL;
5017 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
5018
5019 return res;
5020}
5021
5022
5023/* Create a hash table for stmt_vec_info. */
5024
5025void
5026init_stmt_vec_info_vec (void)
5027{
5028 gcc_assert (!stmt_vec_info_vec);
5029 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5030}
5031
5032
5033/* Free hash table for stmt_vec_info. */
5034
5035void
5036free_stmt_vec_info_vec (void)
5037{
5038 gcc_assert (stmt_vec_info_vec);
5039 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5040}
5041
5042
5043/* Free stmt vectorization related info. */
5044
5045void
5046free_stmt_vec_info (gimple stmt)
5047{
5048 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5049
5050 if (!stmt_info)
5051 return;
5052
5053 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5054 set_vinfo_for_stmt (stmt, NULL);
5055 free (stmt_info);
5056}
5057
5058
bb67d9c7 5059/* Function get_vectype_for_scalar_type_and_size.
ebfd146a 5060
bb67d9c7 5061 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
ebfd146a
IR
5062 by the target. */
5063
bb67d9c7
RG
5064static tree
5065get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
ebfd146a
IR
5066{
5067 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
cc4b5170 5068 enum machine_mode simd_mode;
2f816591 5069 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
5070 int nunits;
5071 tree vectype;
5072
cc4b5170 5073 if (nbytes == 0)
ebfd146a
IR
5074 return NULL_TREE;
5075
2f816591
RG
5076 /* We can't build a vector type of elements with alignment bigger than
5077 their size. */
5078 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5079 return NULL_TREE;
5080
6d7971b8
RG
5081 /* If we'd build a vector type of elements whose mode precision doesn't
5082 match their types precision we'll get mismatched types on vector
5083 extracts via BIT_FIELD_REFs. This effectively means we disable
5084 vectorization of bool and/or enum types in some languages. */
5085 if (INTEGRAL_TYPE_P (scalar_type)
5086 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5087 return NULL_TREE;
5088
cc4b5170
RG
5089 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5090 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5091 return NULL_TREE;
5092
bb67d9c7
RG
5093 /* If no size was supplied use the mode the target prefers. Otherwise
5094 lookup a vector mode of the specified size. */
5095 if (size == 0)
5096 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5097 else
5098 simd_mode = mode_for_vector (inner_mode, size / nbytes);
cc4b5170
RG
5099 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5100 if (nunits <= 1)
5101 return NULL_TREE;
ebfd146a
IR
5102
5103 vectype = build_vector_type (scalar_type, nunits);
5104 if (vect_print_dump_info (REPORT_DETAILS))
5105 {
5106 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5107 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5108 }
5109
5110 if (!vectype)
5111 return NULL_TREE;
5112
5113 if (vect_print_dump_info (REPORT_DETAILS))
5114 {
5115 fprintf (vect_dump, "vectype: ");
5116 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5117 }
5118
5119 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5120 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5121 {
5122 if (vect_print_dump_info (REPORT_DETAILS))
5123 fprintf (vect_dump, "mode not supported by target.");
5124 return NULL_TREE;
5125 }
5126
5127 return vectype;
5128}
5129
bb67d9c7
RG
5130unsigned int current_vector_size;
5131
5132/* Function get_vectype_for_scalar_type.
5133
5134 Returns the vector type corresponding to SCALAR_TYPE as supported
5135 by the target. */
5136
5137tree
5138get_vectype_for_scalar_type (tree scalar_type)
5139{
5140 tree vectype;
5141 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5142 current_vector_size);
5143 if (vectype
5144 && current_vector_size == 0)
5145 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5146 return vectype;
5147}
5148
b690cc0f
RG
5149/* Function get_same_sized_vectype
5150
5151 Returns a vector type corresponding to SCALAR_TYPE of size
5152 VECTOR_TYPE if supported by the target. */
5153
5154tree
bb67d9c7 5155get_same_sized_vectype (tree scalar_type, tree vector_type)
b690cc0f 5156{
bb67d9c7
RG
5157 return get_vectype_for_scalar_type_and_size
5158 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
b690cc0f
RG
5159}
5160
ebfd146a
IR
5161/* Function vect_is_simple_use.
5162
5163 Input:
a70d6342
IR
5164 LOOP_VINFO - the vect info of the loop that is being vectorized.
5165 BB_VINFO - the vect info of the basic block that is being vectorized.
5166 OPERAND - operand of a stmt in the loop or bb.
ebfd146a
IR
5167 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5168
5169 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f 5170 For loops, supportable operands are constants, loop invariants, and operands
ff802fa1 5171 that are defined by the current iteration of the loop. Unsupportable
b8698a0f 5172 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
5173 is the case in reduction/induction computations).
5174 For basic blocks, supportable operands are constants and bb invariants.
5175 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
5176
5177bool
b8698a0f 5178vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
a70d6342 5179 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 5180 tree *def, enum vect_def_type *dt)
b8698a0f 5181{
ebfd146a
IR
5182 basic_block bb;
5183 stmt_vec_info stmt_vinfo;
a70d6342 5184 struct loop *loop = NULL;
b8698a0f 5185
a70d6342
IR
5186 if (loop_vinfo)
5187 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
5188
5189 *def_stmt = NULL;
5190 *def = NULL_TREE;
b8698a0f 5191
ebfd146a
IR
5192 if (vect_print_dump_info (REPORT_DETAILS))
5193 {
5194 fprintf (vect_dump, "vect_is_simple_use: operand ");
5195 print_generic_expr (vect_dump, operand, TDF_SLIM);
5196 }
b8698a0f 5197
ebfd146a
IR
5198 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5199 {
5200 *dt = vect_constant_def;
5201 return true;
5202 }
b8698a0f 5203
ebfd146a
IR
5204 if (is_gimple_min_invariant (operand))
5205 {
5206 *def = operand;
8644a673 5207 *dt = vect_external_def;
ebfd146a
IR
5208 return true;
5209 }
5210
5211 if (TREE_CODE (operand) == PAREN_EXPR)
5212 {
5213 if (vect_print_dump_info (REPORT_DETAILS))
5214 fprintf (vect_dump, "non-associatable copy.");
5215 operand = TREE_OPERAND (operand, 0);
5216 }
b8698a0f 5217
ebfd146a
IR
5218 if (TREE_CODE (operand) != SSA_NAME)
5219 {
5220 if (vect_print_dump_info (REPORT_DETAILS))
5221 fprintf (vect_dump, "not ssa-name.");
5222 return false;
5223 }
b8698a0f 5224
ebfd146a
IR
5225 *def_stmt = SSA_NAME_DEF_STMT (operand);
5226 if (*def_stmt == NULL)
5227 {
5228 if (vect_print_dump_info (REPORT_DETAILS))
5229 fprintf (vect_dump, "no def_stmt.");
5230 return false;
5231 }
5232
5233 if (vect_print_dump_info (REPORT_DETAILS))
5234 {
5235 fprintf (vect_dump, "def_stmt: ");
5236 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5237 }
5238
8644a673 5239 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
5240 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5241 if (gimple_nop_p (*def_stmt))
5242 {
5243 *def = operand;
8644a673 5244 *dt = vect_external_def;
ebfd146a
IR
5245 return true;
5246 }
5247
5248 bb = gimple_bb (*def_stmt);
a70d6342
IR
5249
5250 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5251 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 5252 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 5253 *dt = vect_external_def;
ebfd146a
IR
5254 else
5255 {
5256 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5257 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5258 }
5259
5260 if (*dt == vect_unknown_def_type)
5261 {
5262 if (vect_print_dump_info (REPORT_DETAILS))
5263 fprintf (vect_dump, "Unsupported pattern.");
5264 return false;
5265 }
5266
5267 if (vect_print_dump_info (REPORT_DETAILS))
5268 fprintf (vect_dump, "type of def: %d.",*dt);
5269
5270 switch (gimple_code (*def_stmt))
5271 {
5272 case GIMPLE_PHI:
5273 *def = gimple_phi_result (*def_stmt);
5274 break;
5275
5276 case GIMPLE_ASSIGN:
5277 *def = gimple_assign_lhs (*def_stmt);
5278 break;
5279
5280 case GIMPLE_CALL:
5281 *def = gimple_call_lhs (*def_stmt);
5282 if (*def != NULL)
5283 break;
5284 /* FALLTHRU */
5285 default:
5286 if (vect_print_dump_info (REPORT_DETAILS))
5287 fprintf (vect_dump, "unsupported defining stmt: ");
5288 return false;
5289 }
5290
5291 return true;
5292}
5293
b690cc0f
RG
5294/* Function vect_is_simple_use_1.
5295
5296 Same as vect_is_simple_use_1 but also determines the vector operand
5297 type of OPERAND and stores it to *VECTYPE. If the definition of
5298 OPERAND is vect_uninitialized_def, vect_constant_def or
5299 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5300 is responsible to compute the best suited vector type for the
5301 scalar operand. */
5302
5303bool
5304vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5305 bb_vec_info bb_vinfo, gimple *def_stmt,
5306 tree *def, enum vect_def_type *dt, tree *vectype)
5307{
5308 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5309 return false;
5310
5311 /* Now get a vector type if the def is internal, otherwise supply
5312 NULL_TREE and leave it up to the caller to figure out a proper
5313 type for the use stmt. */
5314 if (*dt == vect_internal_def
5315 || *dt == vect_induction_def
5316 || *dt == vect_reduction_def
5317 || *dt == vect_double_reduction_def
5318 || *dt == vect_nested_cycle)
5319 {
5320 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5321 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5322 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5323 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5324 gcc_assert (*vectype != NULL_TREE);
5325 }
5326 else if (*dt == vect_uninitialized_def
5327 || *dt == vect_constant_def
5328 || *dt == vect_external_def)
5329 *vectype = NULL_TREE;
5330 else
5331 gcc_unreachable ();
5332
5333 return true;
5334}
5335
ebfd146a
IR
5336
5337/* Function supportable_widening_operation
5338
b8698a0f
L
5339 Check whether an operation represented by the code CODE is a
5340 widening operation that is supported by the target platform in
b690cc0f
RG
5341 vector form (i.e., when operating on arguments of type VECTYPE_IN
5342 producing a result of type VECTYPE_OUT).
b8698a0f 5343
ebfd146a
IR
5344 Widening operations we currently support are NOP (CONVERT), FLOAT
5345 and WIDEN_MULT. This function checks if these operations are supported
5346 by the target platform either directly (via vector tree-codes), or via
5347 target builtins.
5348
5349 Output:
b8698a0f
L
5350 - CODE1 and CODE2 are codes of vector operations to be used when
5351 vectorizing the operation, if available.
ebfd146a 5352 - DECL1 and DECL2 are decls of target builtin functions to be used
ff802fa1 5353 when vectorizing the operation, if available. In this case,
b8698a0f 5354 CODE1 and CODE2 are CALL_EXPR.
ebfd146a
IR
5355 - MULTI_STEP_CVT determines the number of required intermediate steps in
5356 case of multi-step conversion (like char->short->int - in that case
5357 MULTI_STEP_CVT will be 1).
b8698a0f
L
5358 - INTERM_TYPES contains the intermediate type required to perform the
5359 widening operation (short in the above example). */
ebfd146a
IR
5360
5361bool
b690cc0f
RG
5362supportable_widening_operation (enum tree_code code, gimple stmt,
5363 tree vectype_out, tree vectype_in,
ebfd146a
IR
5364 tree *decl1, tree *decl2,
5365 enum tree_code *code1, enum tree_code *code2,
5366 int *multi_step_cvt,
5367 VEC (tree, heap) **interm_types)
5368{
5369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5370 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5371 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
5372 bool ordered_p;
5373 enum machine_mode vec_mode;
81f40b79 5374 enum insn_code icode1, icode2;
ebfd146a 5375 optab optab1, optab2;
b690cc0f
RG
5376 tree vectype = vectype_in;
5377 tree wide_vectype = vectype_out;
ebfd146a
IR
5378 enum tree_code c1, c2;
5379
5380 /* The result of a vectorized widening operation usually requires two vectors
b8698a0f
L
5381 (because the widened results do not fit int one vector). The generated
5382 vector results would normally be expected to be generated in the same
ebfd146a
IR
5383 order as in the original scalar computation, i.e. if 8 results are
5384 generated in each vector iteration, they are to be organized as follows:
b8698a0f 5385 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
ebfd146a 5386
b8698a0f 5387 However, in the special case that the result of the widening operation is
ebfd146a 5388 used in a reduction computation only, the order doesn't matter (because
b8698a0f 5389 when vectorizing a reduction we change the order of the computation).
ebfd146a
IR
5390 Some targets can take advantage of this and generate more efficient code.
5391 For example, targets like Altivec, that support widen_mult using a sequence
5392 of {mult_even,mult_odd} generate the following vectors:
5393 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
5394
5395 When vectorizing outer-loops, we execute the inner-loop sequentially
b8698a0f 5396 (each vectorized inner-loop iteration contributes to VF outer-loop
ff802fa1 5397 iterations in parallel). We therefore don't allow to change the order
ebfd146a
IR
5398 of the computation in the inner-loop during outer-loop vectorization. */
5399
5400 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
5401 && !nested_in_vect_loop_p (vect_loop, stmt))
5402 ordered_p = false;
5403 else
5404 ordered_p = true;
5405
5406 if (!ordered_p
5407 && code == WIDEN_MULT_EXPR
5408 && targetm.vectorize.builtin_mul_widen_even
5409 && targetm.vectorize.builtin_mul_widen_even (vectype)
5410 && targetm.vectorize.builtin_mul_widen_odd
5411 && targetm.vectorize.builtin_mul_widen_odd (vectype))
5412 {
5413 if (vect_print_dump_info (REPORT_DETAILS))
5414 fprintf (vect_dump, "Unordered widening operation detected.");
5415
5416 *code1 = *code2 = CALL_EXPR;
5417 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
5418 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
5419 return true;
5420 }
5421
5422 switch (code)
5423 {
5424 case WIDEN_MULT_EXPR:
5425 if (BYTES_BIG_ENDIAN)
5426 {
5427 c1 = VEC_WIDEN_MULT_HI_EXPR;
5428 c2 = VEC_WIDEN_MULT_LO_EXPR;
5429 }
5430 else
5431 {
5432 c2 = VEC_WIDEN_MULT_HI_EXPR;
5433 c1 = VEC_WIDEN_MULT_LO_EXPR;
5434 }
5435 break;
5436
5437 CASE_CONVERT:
5438 if (BYTES_BIG_ENDIAN)
5439 {
5440 c1 = VEC_UNPACK_HI_EXPR;
5441 c2 = VEC_UNPACK_LO_EXPR;
5442 }
5443 else
5444 {
5445 c2 = VEC_UNPACK_HI_EXPR;
5446 c1 = VEC_UNPACK_LO_EXPR;
5447 }
5448 break;
5449
5450 case FLOAT_EXPR:
5451 if (BYTES_BIG_ENDIAN)
5452 {
5453 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
5454 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
5455 }
5456 else
5457 {
5458 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
5459 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
5460 }
5461 break;
5462
5463 case FIX_TRUNC_EXPR:
5464 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
5465 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
5466 computing the operation. */
5467 return false;
5468
5469 default:
5470 gcc_unreachable ();
5471 }
5472
5473 if (code == FIX_TRUNC_EXPR)
5474 {
5475 /* The signedness is determined from output operand. */
b690cc0f
RG
5476 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
5477 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
5478 }
5479 else
5480 {
5481 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5482 optab2 = optab_for_tree_code (c2, vectype, optab_default);
5483 }
5484
5485 if (!optab1 || !optab2)
5486 return false;
5487
5488 vec_mode = TYPE_MODE (vectype);
947131ba
RS
5489 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
5490 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
5491 return false;
5492
b8698a0f 5493 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a
IR
5494 types. */
5495 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5496 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
5497 {
5498 int i;
5499 tree prev_type = vectype, intermediate_type;
5500 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5501 optab optab3, optab4;
5502
5503 if (!CONVERT_EXPR_CODE_P (code))
5504 return false;
b8698a0f 5505
ebfd146a
IR
5506 *code1 = c1;
5507 *code2 = c2;
b8698a0f 5508
ebfd146a 5509 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
ff802fa1
IR
5510 intermediate steps in promotion sequence. We try
5511 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5512 not. */
ebfd146a
IR
5513 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5514 for (i = 0; i < 3; i++)
5515 {
5516 intermediate_mode = insn_data[icode1].operand[0].mode;
5517 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5518 TYPE_UNSIGNED (prev_type));
5519 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
5520 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
5521
5522 if (!optab3 || !optab4
947131ba
RS
5523 || ((icode1 = optab_handler (optab1, prev_mode))
5524 == CODE_FOR_nothing)
ebfd146a 5525 || insn_data[icode1].operand[0].mode != intermediate_mode
947131ba
RS
5526 || ((icode2 = optab_handler (optab2, prev_mode))
5527 == CODE_FOR_nothing)
ebfd146a 5528 || insn_data[icode2].operand[0].mode != intermediate_mode
947131ba
RS
5529 || ((icode1 = optab_handler (optab3, intermediate_mode))
5530 == CODE_FOR_nothing)
5531 || ((icode2 = optab_handler (optab4, intermediate_mode))
5532 == CODE_FOR_nothing))
ebfd146a
IR
5533 return false;
5534
5535 VEC_quick_push (tree, *interm_types, intermediate_type);
5536 (*multi_step_cvt)++;
5537
5538 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5539 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5540 return true;
5541
5542 prev_type = intermediate_type;
5543 prev_mode = intermediate_mode;
5544 }
5545
5546 return false;
5547 }
5548
5549 *code1 = c1;
5550 *code2 = c2;
5551 return true;
5552}
5553
5554
5555/* Function supportable_narrowing_operation
5556
b8698a0f
L
5557 Check whether an operation represented by the code CODE is a
5558 narrowing operation that is supported by the target platform in
b690cc0f
RG
5559 vector form (i.e., when operating on arguments of type VECTYPE_IN
5560 and producing a result of type VECTYPE_OUT).
b8698a0f 5561
ebfd146a 5562 Narrowing operations we currently support are NOP (CONVERT) and
ff802fa1 5563 FIX_TRUNC. This function checks if these operations are supported by
ebfd146a
IR
5564 the target platform directly via vector tree-codes.
5565
5566 Output:
b8698a0f
L
5567 - CODE1 is the code of a vector operation to be used when
5568 vectorizing the operation, if available.
ebfd146a
IR
5569 - MULTI_STEP_CVT determines the number of required intermediate steps in
5570 case of multi-step conversion (like int->short->char - in that case
5571 MULTI_STEP_CVT will be 1).
5572 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 5573 narrowing operation (short in the above example). */
ebfd146a
IR
5574
5575bool
5576supportable_narrowing_operation (enum tree_code code,
b690cc0f 5577 tree vectype_out, tree vectype_in,
ebfd146a
IR
5578 enum tree_code *code1, int *multi_step_cvt,
5579 VEC (tree, heap) **interm_types)
5580{
5581 enum machine_mode vec_mode;
5582 enum insn_code icode1;
5583 optab optab1, interm_optab;
b690cc0f
RG
5584 tree vectype = vectype_in;
5585 tree narrow_vectype = vectype_out;
ebfd146a
IR
5586 enum tree_code c1;
5587 tree intermediate_type, prev_type;
5588 int i;
5589
5590 switch (code)
5591 {
5592 CASE_CONVERT:
5593 c1 = VEC_PACK_TRUNC_EXPR;
5594 break;
5595
5596 case FIX_TRUNC_EXPR:
5597 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5598 break;
5599
5600 case FLOAT_EXPR:
5601 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5602 tree code and optabs used for computing the operation. */
5603 return false;
5604
5605 default:
5606 gcc_unreachable ();
5607 }
5608
5609 if (code == FIX_TRUNC_EXPR)
5610 /* The signedness is determined from output operand. */
b690cc0f 5611 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
5612 else
5613 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5614
5615 if (!optab1)
5616 return false;
5617
5618 vec_mode = TYPE_MODE (vectype);
947131ba 5619 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
ebfd146a
IR
5620 return false;
5621
5622 /* Check if it's a multi-step conversion that can be done using intermediate
5623 types. */
5624 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5625 {
5626 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5627
5628 *code1 = c1;
5629 prev_type = vectype;
5630 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
ff802fa1
IR
5631 intermediate steps in promotion sequence. We try
5632 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
5633 not. */
ebfd146a
IR
5634 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5635 for (i = 0; i < 3; i++)
5636 {
5637 intermediate_mode = insn_data[icode1].operand[0].mode;
5638 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5639 TYPE_UNSIGNED (prev_type));
b8698a0f 5640 interm_optab = optab_for_tree_code (c1, intermediate_type,
ebfd146a 5641 optab_default);
b8698a0f 5642 if (!interm_optab
947131ba
RS
5643 || ((icode1 = optab_handler (optab1, prev_mode))
5644 == CODE_FOR_nothing)
ebfd146a 5645 || insn_data[icode1].operand[0].mode != intermediate_mode
947131ba
RS
5646 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
5647 == CODE_FOR_nothing))
ebfd146a
IR
5648 return false;
5649
5650 VEC_quick_push (tree, *interm_types, intermediate_type);
5651 (*multi_step_cvt)++;
5652
5653 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5654 return true;
5655
5656 prev_type = intermediate_type;
5657 prev_mode = intermediate_mode;
5658 }
5659
5660 return false;
5661 }
5662
5663 *code1 = c1;
5664 return true;
5665}