]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-stmts.c
re PR target/44531 ([SH] Multilib configuration does not work as expected on darwin)
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
CommitLineData
ebfd146a 1/* Statement Analysis and Transformation for Vectorization
62f7fd21
MM
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
40#include "toplev.h"
41#include "tree-vectorizer.h"
42#include "langhooks.h"
43
44
45/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
46
47/* Function vect_mark_relevant.
48
49 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
50
51static void
52vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
53 enum vect_relevant relevant, bool live_p)
54{
55 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
56 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
57 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
58
59 if (vect_print_dump_info (REPORT_DETAILS))
60 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
61
62 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
63 {
64 gimple pattern_stmt;
65
b8698a0f 66 /* This is the last stmt in a sequence that was detected as a
ebfd146a
IR
67 pattern that can potentially be vectorized. Don't mark the stmt
68 as relevant/live because it's not going to be vectorized.
69 Instead mark the pattern-stmt that replaces it. */
70
71 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
72
73 if (vect_print_dump_info (REPORT_DETAILS))
74 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
75 stmt_info = vinfo_for_stmt (pattern_stmt);
76 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
77 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
78 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
79 stmt = pattern_stmt;
80 }
81
82 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
83 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
84 STMT_VINFO_RELEVANT (stmt_info) = relevant;
85
86 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
87 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
88 {
89 if (vect_print_dump_info (REPORT_DETAILS))
90 fprintf (vect_dump, "already marked relevant/live.");
91 return;
92 }
93
94 VEC_safe_push (gimple, heap, *worklist, stmt);
95}
96
97
98/* Function vect_stmt_relevant_p.
99
100 Return true if STMT in loop that is represented by LOOP_VINFO is
101 "relevant for vectorization".
102
103 A stmt is considered "relevant for vectorization" if:
104 - it has uses outside the loop.
105 - it has vdefs (it alters memory).
106 - control stmts in the loop (except for the exit condition).
107
108 CHECKME: what other side effects would the vectorizer allow? */
109
110static bool
111vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
112 enum vect_relevant *relevant, bool *live_p)
113{
114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
115 ssa_op_iter op_iter;
116 imm_use_iterator imm_iter;
117 use_operand_p use_p;
118 def_operand_p def_p;
119
8644a673 120 *relevant = vect_unused_in_scope;
ebfd146a
IR
121 *live_p = false;
122
123 /* cond stmt other than loop exit cond. */
b8698a0f
L
124 if (is_ctrl_stmt (stmt)
125 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
126 != loop_exit_ctrl_vec_info_type)
8644a673 127 *relevant = vect_used_in_scope;
ebfd146a
IR
128
129 /* changing memory. */
130 if (gimple_code (stmt) != GIMPLE_PHI)
5006671f 131 if (gimple_vdef (stmt))
ebfd146a
IR
132 {
133 if (vect_print_dump_info (REPORT_DETAILS))
134 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
8644a673 135 *relevant = vect_used_in_scope;
ebfd146a
IR
136 }
137
138 /* uses outside the loop. */
139 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
140 {
141 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
142 {
143 basic_block bb = gimple_bb (USE_STMT (use_p));
144 if (!flow_bb_inside_loop_p (loop, bb))
145 {
146 if (vect_print_dump_info (REPORT_DETAILS))
147 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
148
3157b0c2
AO
149 if (is_gimple_debug (USE_STMT (use_p)))
150 continue;
151
ebfd146a
IR
152 /* We expect all such uses to be in the loop exit phis
153 (because of loop closed form) */
154 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
155 gcc_assert (bb == single_exit (loop)->dest);
156
157 *live_p = true;
158 }
159 }
160 }
161
162 return (*live_p || *relevant);
163}
164
165
b8698a0f 166/* Function exist_non_indexing_operands_for_use_p
ebfd146a 167
b8698a0f 168 USE is one of the uses attached to STMT. Check if USE is
ebfd146a
IR
169 used in STMT for anything other than indexing an array. */
170
171static bool
172exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
173{
174 tree operand;
175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
59a05b0c 176
ebfd146a
IR
177 /* USE corresponds to some operand in STMT. If there is no data
178 reference in STMT, then any operand that corresponds to USE
179 is not indexing an array. */
180 if (!STMT_VINFO_DATA_REF (stmt_info))
181 return true;
59a05b0c 182
ebfd146a
IR
183 /* STMT has a data_ref. FORNOW this means that its of one of
184 the following forms:
185 -1- ARRAY_REF = var
186 -2- var = ARRAY_REF
187 (This should have been verified in analyze_data_refs).
188
189 'var' in the second case corresponds to a def, not a use,
b8698a0f 190 so USE cannot correspond to any operands that are not used
ebfd146a
IR
191 for array indexing.
192
193 Therefore, all we need to check is if STMT falls into the
194 first case, and whether var corresponds to USE. */
ebfd146a
IR
195
196 if (!gimple_assign_copy_p (stmt))
197 return false;
59a05b0c
EB
198 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
199 return false;
ebfd146a 200 operand = gimple_assign_rhs1 (stmt);
ebfd146a
IR
201 if (TREE_CODE (operand) != SSA_NAME)
202 return false;
203
204 if (operand == use)
205 return true;
206
207 return false;
208}
209
210
b8698a0f 211/*
ebfd146a
IR
212 Function process_use.
213
214 Inputs:
215 - a USE in STMT in a loop represented by LOOP_VINFO
b8698a0f 216 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
ebfd146a
IR
217 that defined USE. This is done by calling mark_relevant and passing it
218 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
219
220 Outputs:
221 Generally, LIVE_P and RELEVANT are used to define the liveness and
222 relevance info of the DEF_STMT of this USE:
223 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
224 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
225 Exceptions:
226 - case 1: If USE is used only for address computations (e.g. array indexing),
b8698a0f 227 which does not need to be directly vectorized, then the liveness/relevance
ebfd146a 228 of the respective DEF_STMT is left unchanged.
b8698a0f
L
229 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
230 skip DEF_STMT cause it had already been processed.
ebfd146a
IR
231 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
232 be modified accordingly.
233
234 Return true if everything is as expected. Return false otherwise. */
235
236static bool
b8698a0f 237process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
ebfd146a
IR
238 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
239{
240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
241 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
242 stmt_vec_info dstmt_vinfo;
243 basic_block bb, def_bb;
244 tree def;
245 gimple def_stmt;
246 enum vect_def_type dt;
247
b8698a0f 248 /* case 1: we are only interested in uses that need to be vectorized. Uses
ebfd146a
IR
249 that are used for address computation are not considered relevant. */
250 if (!exist_non_indexing_operands_for_use_p (use, stmt))
251 return true;
252
a70d6342 253 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
b8698a0f 254 {
8644a673 255 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
256 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
257 return false;
258 }
259
260 if (!def_stmt || gimple_nop_p (def_stmt))
261 return true;
262
263 def_bb = gimple_bb (def_stmt);
264 if (!flow_bb_inside_loop_p (loop, def_bb))
265 {
266 if (vect_print_dump_info (REPORT_DETAILS))
267 fprintf (vect_dump, "def_stmt is out of loop.");
268 return true;
269 }
270
b8698a0f
L
271 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
272 DEF_STMT must have already been processed, because this should be the
273 only way that STMT, which is a reduction-phi, was put in the worklist,
274 as there should be no other uses for DEF_STMT in the loop. So we just
ebfd146a
IR
275 check that everything is as expected, and we are done. */
276 dstmt_vinfo = vinfo_for_stmt (def_stmt);
277 bb = gimple_bb (stmt);
278 if (gimple_code (stmt) == GIMPLE_PHI
279 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
280 && gimple_code (def_stmt) != GIMPLE_PHI
281 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
282 && bb->loop_father == def_bb->loop_father)
283 {
284 if (vect_print_dump_info (REPORT_DETAILS))
285 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
286 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
287 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
288 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
b8698a0f 289 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
8644a673 290 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
ebfd146a
IR
291 return true;
292 }
293
294 /* case 3a: outer-loop stmt defining an inner-loop stmt:
295 outer-loop-header-bb:
296 d = def_stmt
297 inner-loop:
298 stmt # use (d)
299 outer-loop-tail-bb:
300 ... */
301 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
302 {
303 if (vect_print_dump_info (REPORT_DETAILS))
304 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
7c5222ff 305
ebfd146a
IR
306 switch (relevant)
307 {
8644a673 308 case vect_unused_in_scope:
7c5222ff
IR
309 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
310 vect_used_in_scope : vect_unused_in_scope;
ebfd146a 311 break;
7c5222ff 312
ebfd146a 313 case vect_used_in_outer_by_reduction:
7c5222ff 314 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
ebfd146a
IR
315 relevant = vect_used_by_reduction;
316 break;
7c5222ff 317
ebfd146a 318 case vect_used_in_outer:
7c5222ff 319 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
8644a673 320 relevant = vect_used_in_scope;
ebfd146a 321 break;
7c5222ff 322
8644a673 323 case vect_used_in_scope:
ebfd146a
IR
324 break;
325
326 default:
327 gcc_unreachable ();
b8698a0f 328 }
ebfd146a
IR
329 }
330
331 /* case 3b: inner-loop stmt defining an outer-loop stmt:
332 outer-loop-header-bb:
333 ...
334 inner-loop:
335 d = def_stmt
06066f92 336 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
ebfd146a
IR
337 stmt # use (d) */
338 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
339 {
340 if (vect_print_dump_info (REPORT_DETAILS))
341 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
7c5222ff 342
ebfd146a
IR
343 switch (relevant)
344 {
8644a673 345 case vect_unused_in_scope:
b8698a0f 346 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
06066f92 347 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
a70d6342 348 vect_used_in_outer_by_reduction : vect_unused_in_scope;
ebfd146a
IR
349 break;
350
ebfd146a
IR
351 case vect_used_by_reduction:
352 relevant = vect_used_in_outer_by_reduction;
353 break;
354
8644a673 355 case vect_used_in_scope:
ebfd146a
IR
356 relevant = vect_used_in_outer;
357 break;
358
359 default:
360 gcc_unreachable ();
361 }
362 }
363
364 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
365 return true;
366}
367
368
369/* Function vect_mark_stmts_to_be_vectorized.
370
371 Not all stmts in the loop need to be vectorized. For example:
372
373 for i...
374 for j...
375 1. T0 = i + j
376 2. T1 = a[T0]
377
378 3. j = j + 1
379
380 Stmt 1 and 3 do not need to be vectorized, because loop control and
381 addressing of vectorized data-refs are handled differently.
382
383 This pass detects such stmts. */
384
385bool
386vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
387{
388 VEC(gimple,heap) *worklist;
389 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
390 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
391 unsigned int nbbs = loop->num_nodes;
392 gimple_stmt_iterator si;
393 gimple stmt;
394 unsigned int i;
395 stmt_vec_info stmt_vinfo;
396 basic_block bb;
397 gimple phi;
398 bool live_p;
06066f92
IR
399 enum vect_relevant relevant, tmp_relevant;
400 enum vect_def_type def_type;
ebfd146a
IR
401
402 if (vect_print_dump_info (REPORT_DETAILS))
403 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
404
405 worklist = VEC_alloc (gimple, heap, 64);
406
407 /* 1. Init worklist. */
408 for (i = 0; i < nbbs; i++)
409 {
410 bb = bbs[i];
411 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
b8698a0f 412 {
ebfd146a
IR
413 phi = gsi_stmt (si);
414 if (vect_print_dump_info (REPORT_DETAILS))
415 {
416 fprintf (vect_dump, "init: phi relevant? ");
417 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
418 }
419
420 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
421 vect_mark_relevant (&worklist, phi, relevant, live_p);
422 }
423 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
424 {
425 stmt = gsi_stmt (si);
426 if (vect_print_dump_info (REPORT_DETAILS))
427 {
428 fprintf (vect_dump, "init: stmt relevant? ");
429 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 430 }
ebfd146a
IR
431
432 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
433 vect_mark_relevant (&worklist, stmt, relevant, live_p);
434 }
435 }
436
437 /* 2. Process_worklist */
438 while (VEC_length (gimple, worklist) > 0)
439 {
440 use_operand_p use_p;
441 ssa_op_iter iter;
442
443 stmt = VEC_pop (gimple, worklist);
444 if (vect_print_dump_info (REPORT_DETAILS))
445 {
446 fprintf (vect_dump, "worklist: examine stmt: ");
447 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
448 }
449
b8698a0f
L
450 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
451 (DEF_STMT) as relevant/irrelevant and live/dead according to the
ebfd146a
IR
452 liveness and relevance properties of STMT. */
453 stmt_vinfo = vinfo_for_stmt (stmt);
454 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
455 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
456
457 /* Generally, the liveness and relevance properties of STMT are
458 propagated as is to the DEF_STMTs of its USEs:
459 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
460 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
461
462 One exception is when STMT has been identified as defining a reduction
463 variable; in this case we set the liveness/relevance as follows:
464 live_p = false
465 relevant = vect_used_by_reduction
466 This is because we distinguish between two kinds of relevant stmts -
b8698a0f
L
467 those that are used by a reduction computation, and those that are
468 (also) used by a regular computation. This allows us later on to
469 identify stmts that are used solely by a reduction, and therefore the
7c5222ff 470 order of the results that they produce does not have to be kept. */
ebfd146a 471
06066f92
IR
472 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
473 tmp_relevant = relevant;
474 switch (def_type)
ebfd146a 475 {
06066f92
IR
476 case vect_reduction_def:
477 switch (tmp_relevant)
478 {
479 case vect_unused_in_scope:
480 relevant = vect_used_by_reduction;
481 break;
482
483 case vect_used_by_reduction:
484 if (gimple_code (stmt) == GIMPLE_PHI)
485 break;
486 /* fall through */
487
488 default:
489 if (vect_print_dump_info (REPORT_DETAILS))
490 fprintf (vect_dump, "unsupported use of reduction.");
491
492 VEC_free (gimple, heap, worklist);
493 return false;
494 }
495
b8698a0f 496 live_p = false;
06066f92 497 break;
b8698a0f 498
06066f92
IR
499 case vect_nested_cycle:
500 if (tmp_relevant != vect_unused_in_scope
501 && tmp_relevant != vect_used_in_outer_by_reduction
502 && tmp_relevant != vect_used_in_outer)
503 {
504 if (vect_print_dump_info (REPORT_DETAILS))
505 fprintf (vect_dump, "unsupported use of nested cycle.");
7c5222ff 506
06066f92
IR
507 VEC_free (gimple, heap, worklist);
508 return false;
509 }
7c5222ff 510
b8698a0f
L
511 live_p = false;
512 break;
513
06066f92
IR
514 case vect_double_reduction_def:
515 if (tmp_relevant != vect_unused_in_scope
516 && tmp_relevant != vect_used_by_reduction)
517 {
7c5222ff 518 if (vect_print_dump_info (REPORT_DETAILS))
06066f92 519 fprintf (vect_dump, "unsupported use of double reduction.");
7c5222ff
IR
520
521 VEC_free (gimple, heap, worklist);
522 return false;
06066f92
IR
523 }
524
525 live_p = false;
b8698a0f 526 break;
7c5222ff 527
06066f92
IR
528 default:
529 break;
7c5222ff 530 }
b8698a0f 531
ebfd146a
IR
532 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
533 {
534 tree op = USE_FROM_PTR (use_p);
535 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
536 {
537 VEC_free (gimple, heap, worklist);
538 return false;
539 }
540 }
541 } /* while worklist */
542
543 VEC_free (gimple, heap, worklist);
544 return true;
545}
546
547
720f5239
IR
548/* Get cost by calling cost target builtin. */
549
550static inline
551int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
552{
553 tree dummy_type = NULL;
554 int dummy = 0;
555
556 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
557 dummy_type, dummy);
558}
559
ebfd146a
IR
560int
561cost_for_stmt (gimple stmt)
562{
563 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
564
565 switch (STMT_VINFO_TYPE (stmt_info))
566 {
567 case load_vec_info_type:
720f5239 568 return vect_get_stmt_cost (scalar_load);
ebfd146a 569 case store_vec_info_type:
720f5239 570 return vect_get_stmt_cost (scalar_store);
ebfd146a
IR
571 case op_vec_info_type:
572 case condition_vec_info_type:
573 case assignment_vec_info_type:
574 case reduc_vec_info_type:
575 case induc_vec_info_type:
576 case type_promotion_vec_info_type:
577 case type_demotion_vec_info_type:
578 case type_conversion_vec_info_type:
579 case call_vec_info_type:
720f5239 580 return vect_get_stmt_cost (scalar_stmt);
ebfd146a
IR
581 case undef_vec_info_type:
582 default:
583 gcc_unreachable ();
584 }
585}
586
b8698a0f 587/* Function vect_model_simple_cost.
ebfd146a 588
b8698a0f 589 Models cost for simple operations, i.e. those that only emit ncopies of a
ebfd146a
IR
590 single op. Right now, this does not account for multiple insns that could
591 be generated for the single vector op. We will handle that shortly. */
592
593void
b8698a0f 594vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
ebfd146a
IR
595 enum vect_def_type *dt, slp_tree slp_node)
596{
597 int i;
598 int inside_cost = 0, outside_cost = 0;
599
600 /* The SLP costs were already calculated during SLP tree build. */
601 if (PURE_SLP_STMT (stmt_info))
602 return;
603
720f5239 604 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
605
606 /* FORNOW: Assuming maximum 2 args per stmts. */
607 for (i = 0; i < 2; i++)
608 {
8644a673 609 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
720f5239 610 outside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a 611 }
b8698a0f 612
ebfd146a
IR
613 if (vect_print_dump_info (REPORT_COST))
614 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
615 "outside_cost = %d .", inside_cost, outside_cost);
616
617 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
618 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
619 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
620}
621
622
b8698a0f
L
623/* Function vect_cost_strided_group_size
624
ebfd146a
IR
625 For strided load or store, return the group_size only if it is the first
626 load or store of a group, else return 1. This ensures that group size is
627 only returned once per group. */
628
629static int
630vect_cost_strided_group_size (stmt_vec_info stmt_info)
631{
632 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
633
634 if (first_stmt == STMT_VINFO_STMT (stmt_info))
635 return DR_GROUP_SIZE (stmt_info);
636
637 return 1;
638}
639
640
641/* Function vect_model_store_cost
642
643 Models cost for stores. In the case of strided accesses, one access
644 has the overhead of the strided access attributed to it. */
645
646void
b8698a0f 647vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
ebfd146a
IR
648 enum vect_def_type dt, slp_tree slp_node)
649{
650 int group_size;
720f5239
IR
651 unsigned int inside_cost = 0, outside_cost = 0;
652 struct data_reference *first_dr;
653 gimple first_stmt;
ebfd146a
IR
654
655 /* The SLP costs were already calculated during SLP tree build. */
656 if (PURE_SLP_STMT (stmt_info))
657 return;
658
8644a673 659 if (dt == vect_constant_def || dt == vect_external_def)
720f5239 660 outside_cost = vect_get_stmt_cost (scalar_to_vec);
ebfd146a
IR
661
662 /* Strided access? */
720f5239
IR
663 if (DR_GROUP_FIRST_DR (stmt_info))
664 {
665 if (slp_node)
666 {
667 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
668 group_size = 1;
669 }
670 else
671 {
672 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
673 group_size = vect_cost_strided_group_size (stmt_info);
674 }
675
676 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
677 }
ebfd146a
IR
678 /* Not a strided access. */
679 else
720f5239
IR
680 {
681 group_size = 1;
682 first_dr = STMT_VINFO_DATA_REF (stmt_info);
683 }
ebfd146a 684
b8698a0f 685 /* Is this an access in a group of stores, which provide strided access?
ebfd146a 686 If so, add in the cost of the permutes. */
b8698a0f 687 if (group_size > 1)
ebfd146a
IR
688 {
689 /* Uses a high and low interleave operation for each needed permute. */
b8698a0f 690 inside_cost = ncopies * exact_log2(group_size) * group_size
720f5239 691 * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
692
693 if (vect_print_dump_info (REPORT_COST))
694 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
695 group_size);
696
697 }
698
699 /* Costs of the stores. */
720f5239 700 vect_get_store_cost (first_dr, ncopies, &inside_cost);
ebfd146a
IR
701
702 if (vect_print_dump_info (REPORT_COST))
703 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
704 "outside_cost = %d .", inside_cost, outside_cost);
705
706 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
707 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
708 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
709}
710
711
720f5239
IR
712/* Calculate cost of DR's memory access. */
713void
714vect_get_store_cost (struct data_reference *dr, int ncopies,
715 unsigned int *inside_cost)
716{
717 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
718
719 switch (alignment_support_scheme)
720 {
721 case dr_aligned:
722 {
723 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
724
725 if (vect_print_dump_info (REPORT_COST))
726 fprintf (vect_dump, "vect_model_store_cost: aligned.");
727
728 break;
729 }
730
731 case dr_unaligned_supported:
732 {
733 gimple stmt = DR_STMT (dr);
734 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
735 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
736
737 /* Here, we assign an additional cost for the unaligned store. */
738 *inside_cost += ncopies
739 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
740 vectype, DR_MISALIGNMENT (dr));
741
742 if (vect_print_dump_info (REPORT_COST))
743 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
744 "hardware.");
745
746 break;
747 }
748
749 default:
750 gcc_unreachable ();
751 }
752}
753
754
ebfd146a
IR
755/* Function vect_model_load_cost
756
757 Models cost for loads. In the case of strided accesses, the last access
758 has the overhead of the strided access attributed to it. Since unaligned
b8698a0f 759 accesses are supported for loads, we also account for the costs of the
ebfd146a
IR
760 access scheme chosen. */
761
762void
763vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
b8698a0f 764
ebfd146a
IR
765{
766 int group_size;
ebfd146a
IR
767 gimple first_stmt;
768 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
720f5239 769 unsigned int inside_cost = 0, outside_cost = 0;
ebfd146a
IR
770
771 /* The SLP costs were already calculated during SLP tree build. */
772 if (PURE_SLP_STMT (stmt_info))
773 return;
774
775 /* Strided accesses? */
776 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
777 if (first_stmt && !slp_node)
778 {
779 group_size = vect_cost_strided_group_size (stmt_info);
780 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
781 }
782 /* Not a strided access. */
783 else
784 {
785 group_size = 1;
786 first_dr = dr;
787 }
788
b8698a0f 789 /* Is this an access in a group of loads providing strided access?
ebfd146a 790 If so, add in the cost of the permutes. */
b8698a0f 791 if (group_size > 1)
ebfd146a
IR
792 {
793 /* Uses an even and odd extract operations for each needed permute. */
794 inside_cost = ncopies * exact_log2(group_size) * group_size
720f5239 795 * vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
796
797 if (vect_print_dump_info (REPORT_COST))
798 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
799 group_size);
ebfd146a
IR
800 }
801
802 /* The loads themselves. */
720f5239
IR
803 vect_get_load_cost (first_dr, ncopies,
804 ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node),
805 &inside_cost, &outside_cost);
806
807 if (vect_print_dump_info (REPORT_COST))
808 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
809 "outside_cost = %d .", inside_cost, outside_cost);
810
811 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
812 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
813 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
814}
815
816
817/* Calculate cost of DR's memory access. */
818void
819vect_get_load_cost (struct data_reference *dr, int ncopies,
820 bool add_realign_cost, unsigned int *inside_cost,
821 unsigned int *outside_cost)
822{
823 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
824
825 switch (alignment_support_scheme)
ebfd146a
IR
826 {
827 case dr_aligned:
828 {
720f5239 829 inside_cost += ncopies * vect_get_stmt_cost (vector_load);
ebfd146a
IR
830
831 if (vect_print_dump_info (REPORT_COST))
832 fprintf (vect_dump, "vect_model_load_cost: aligned.");
833
834 break;
835 }
836 case dr_unaligned_supported:
837 {
720f5239
IR
838 gimple stmt = DR_STMT (dr);
839 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 841
720f5239
IR
842 /* Here, we assign an additional cost for the unaligned load. */
843 *inside_cost += ncopies
844 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
845 vectype, DR_MISALIGNMENT (dr));
ebfd146a
IR
846 if (vect_print_dump_info (REPORT_COST))
847 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
848 "hardware.");
849
850 break;
851 }
852 case dr_explicit_realign:
853 {
720f5239
IR
854 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
855 + vect_get_stmt_cost (vector_stmt));
ebfd146a
IR
856
857 /* FIXME: If the misalignment remains fixed across the iterations of
858 the containing loop, the following cost should be added to the
859 outside costs. */
860 if (targetm.vectorize.builtin_mask_for_load)
720f5239 861 *inside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
862
863 break;
864 }
865 case dr_explicit_realign_optimized:
866 {
867 if (vect_print_dump_info (REPORT_COST))
868 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
869 "pipelined.");
870
871 /* Unaligned software pipeline has a load of an address, an initial
872 load, and possibly a mask operation to "prime" the loop. However,
873 if this is an access in a group of loads, which provide strided
874 access, then the above cost should only be considered for one
875 access in the group. Inside the loop, there is a load op
876 and a realignment op. */
877
720f5239 878 if (add_realign_cost)
ebfd146a 879 {
720f5239 880 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
ebfd146a 881 if (targetm.vectorize.builtin_mask_for_load)
720f5239 882 *outside_cost += vect_get_stmt_cost (vector_stmt);
ebfd146a
IR
883 }
884
720f5239
IR
885 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
886 + vect_get_stmt_cost (vector_stmt));
ebfd146a
IR
887 break;
888 }
889
890 default:
891 gcc_unreachable ();
892 }
ebfd146a
IR
893}
894
895
896/* Function vect_init_vector.
897
898 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
899 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
900 is not NULL. Otherwise, place the initialization at the loop preheader.
b8698a0f 901 Return the DEF of INIT_STMT.
ebfd146a
IR
902 It will be used in the vectorization of STMT. */
903
904tree
905vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
906 gimple_stmt_iterator *gsi)
907{
908 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
909 tree new_var;
910 gimple init_stmt;
911 tree vec_oprnd;
912 edge pe;
913 tree new_temp;
914 basic_block new_bb;
b8698a0f 915
ebfd146a 916 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
b8698a0f 917 add_referenced_var (new_var);
ebfd146a
IR
918 init_stmt = gimple_build_assign (new_var, vector_var);
919 new_temp = make_ssa_name (new_var, init_stmt);
920 gimple_assign_set_lhs (init_stmt, new_temp);
921
922 if (gsi)
923 vect_finish_stmt_generation (stmt, init_stmt, gsi);
924 else
925 {
926 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
b8698a0f 927
a70d6342
IR
928 if (loop_vinfo)
929 {
930 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
931
932 if (nested_in_vect_loop_p (loop, stmt))
933 loop = loop->inner;
b8698a0f 934
a70d6342
IR
935 pe = loop_preheader_edge (loop);
936 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
937 gcc_assert (!new_bb);
938 }
939 else
940 {
941 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
942 basic_block bb;
943 gimple_stmt_iterator gsi_bb_start;
944
945 gcc_assert (bb_vinfo);
946 bb = BB_VINFO_BB (bb_vinfo);
12aaf609 947 gsi_bb_start = gsi_after_labels (bb);
a70d6342
IR
948 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
949 }
ebfd146a
IR
950 }
951
952 if (vect_print_dump_info (REPORT_DETAILS))
953 {
954 fprintf (vect_dump, "created new init_stmt: ");
955 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
956 }
957
958 vec_oprnd = gimple_assign_lhs (init_stmt);
959 return vec_oprnd;
960}
961
a70d6342 962
ebfd146a
IR
963/* Function vect_get_vec_def_for_operand.
964
965 OP is an operand in STMT. This function returns a (vector) def that will be
966 used in the vectorized stmt for STMT.
967
968 In the case that OP is an SSA_NAME which is defined in the loop, then
969 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
970
971 In case OP is an invariant or constant, a new stmt that creates a vector def
972 needs to be introduced. */
973
974tree
975vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
976{
977 tree vec_oprnd;
978 gimple vec_stmt;
979 gimple def_stmt;
980 stmt_vec_info def_stmt_info = NULL;
981 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
982 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
983 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
984 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
985 tree vec_inv;
986 tree vec_cst;
987 tree t = NULL_TREE;
988 tree def;
989 int i;
990 enum vect_def_type dt;
991 bool is_simple_use;
992 tree vector_type;
993
994 if (vect_print_dump_info (REPORT_DETAILS))
995 {
996 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
997 print_generic_expr (vect_dump, op, TDF_SLIM);
998 }
999
b8698a0f 1000 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
a70d6342 1001 &dt);
ebfd146a
IR
1002 gcc_assert (is_simple_use);
1003 if (vect_print_dump_info (REPORT_DETAILS))
1004 {
1005 if (def)
1006 {
1007 fprintf (vect_dump, "def = ");
1008 print_generic_expr (vect_dump, def, TDF_SLIM);
1009 }
1010 if (def_stmt)
1011 {
1012 fprintf (vect_dump, " def_stmt = ");
1013 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1014 }
1015 }
1016
1017 switch (dt)
1018 {
1019 /* Case 1: operand is a constant. */
1020 case vect_constant_def:
1021 {
7569a6cc
RG
1022 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1023 gcc_assert (vector_type);
1024
b8698a0f 1025 if (scalar_def)
ebfd146a
IR
1026 *scalar_def = op;
1027
1028 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1029 if (vect_print_dump_info (REPORT_DETAILS))
1030 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1031
1032 for (i = nunits - 1; i >= 0; --i)
1033 {
1034 t = tree_cons (NULL_TREE, op, t);
1035 }
7569a6cc
RG
1036 vec_cst = build_vector (vector_type, t);
1037 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
ebfd146a
IR
1038 }
1039
1040 /* Case 2: operand is defined outside the loop - loop invariant. */
8644a673 1041 case vect_external_def:
ebfd146a
IR
1042 {
1043 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1044 gcc_assert (vector_type);
1045 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1046
b8698a0f 1047 if (scalar_def)
ebfd146a
IR
1048 *scalar_def = def;
1049
1050 /* Create 'vec_inv = {inv,inv,..,inv}' */
1051 if (vect_print_dump_info (REPORT_DETAILS))
1052 fprintf (vect_dump, "Create vector_inv.");
1053
1054 for (i = nunits - 1; i >= 0; --i)
1055 {
1056 t = tree_cons (NULL_TREE, def, t);
1057 }
1058
1059 /* FIXME: use build_constructor directly. */
1060 vec_inv = build_constructor_from_list (vector_type, t);
1061 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1062 }
1063
1064 /* Case 3: operand is defined inside the loop. */
8644a673 1065 case vect_internal_def:
ebfd146a 1066 {
b8698a0f 1067 if (scalar_def)
ebfd146a
IR
1068 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1069
1070 /* Get the def from the vectorized stmt. */
1071 def_stmt_info = vinfo_for_stmt (def_stmt);
1072 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1073 gcc_assert (vec_stmt);
1074 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1075 vec_oprnd = PHI_RESULT (vec_stmt);
1076 else if (is_gimple_call (vec_stmt))
1077 vec_oprnd = gimple_call_lhs (vec_stmt);
1078 else
1079 vec_oprnd = gimple_assign_lhs (vec_stmt);
1080 return vec_oprnd;
1081 }
1082
1083 /* Case 4: operand is defined by a loop header phi - reduction */
1084 case vect_reduction_def:
06066f92 1085 case vect_double_reduction_def:
7c5222ff 1086 case vect_nested_cycle:
ebfd146a
IR
1087 {
1088 struct loop *loop;
1089
1090 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
b8698a0f 1091 loop = (gimple_bb (def_stmt))->loop_father;
ebfd146a
IR
1092
1093 /* Get the def before the loop */
1094 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1095 return get_initial_def_for_reduction (stmt, op, scalar_def);
1096 }
1097
1098 /* Case 5: operand is defined by loop-header phi - induction. */
1099 case vect_induction_def:
1100 {
1101 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1102
1103 /* Get the def from the vectorized stmt. */
1104 def_stmt_info = vinfo_for_stmt (def_stmt);
1105 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1106 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
1107 vec_oprnd = PHI_RESULT (vec_stmt);
1108 return vec_oprnd;
1109 }
1110
1111 default:
1112 gcc_unreachable ();
1113 }
1114}
1115
1116
1117/* Function vect_get_vec_def_for_stmt_copy
1118
b8698a0f
L
1119 Return a vector-def for an operand. This function is used when the
1120 vectorized stmt to be created (by the caller to this function) is a "copy"
1121 created in case the vectorized result cannot fit in one vector, and several
1122 copies of the vector-stmt are required. In this case the vector-def is
ebfd146a 1123 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
b8698a0f 1124 of the stmt that defines VEC_OPRND.
ebfd146a
IR
1125 DT is the type of the vector def VEC_OPRND.
1126
1127 Context:
1128 In case the vectorization factor (VF) is bigger than the number
1129 of elements that can fit in a vectype (nunits), we have to generate
1130 more than one vector stmt to vectorize the scalar stmt. This situation
b8698a0f 1131 arises when there are multiple data-types operated upon in the loop; the
ebfd146a
IR
1132 smallest data-type determines the VF, and as a result, when vectorizing
1133 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1134 vector stmt (each computing a vector of 'nunits' results, and together
b8698a0f 1135 computing 'VF' results in each iteration). This function is called when
ebfd146a
IR
1136 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1137 which VF=16 and nunits=4, so the number of copies required is 4):
1138
1139 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
b8698a0f 1140
ebfd146a
IR
1141 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1142 VS1.1: vx.1 = memref1 VS1.2
1143 VS1.2: vx.2 = memref2 VS1.3
b8698a0f 1144 VS1.3: vx.3 = memref3
ebfd146a
IR
1145
1146 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1147 VSnew.1: vz1 = vx.1 + ... VSnew.2
1148 VSnew.2: vz2 = vx.2 + ... VSnew.3
1149 VSnew.3: vz3 = vx.3 + ...
1150
1151 The vectorization of S1 is explained in vectorizable_load.
1152 The vectorization of S2:
b8698a0f
L
1153 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1154 the function 'vect_get_vec_def_for_operand' is called to
ebfd146a
IR
1155 get the relevant vector-def for each operand of S2. For operand x it
1156 returns the vector-def 'vx.0'.
1157
b8698a0f
L
1158 To create the remaining copies of the vector-stmt (VSnew.j), this
1159 function is called to get the relevant vector-def for each operand. It is
1160 obtained from the respective VS1.j stmt, which is recorded in the
ebfd146a
IR
1161 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1162
b8698a0f
L
1163 For example, to obtain the vector-def 'vx.1' in order to create the
1164 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1165 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
ebfd146a
IR
1166 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1167 and return its def ('vx.1').
1168 Overall, to create the above sequence this function will be called 3 times:
1169 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1170 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1171 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1172
1173tree
1174vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1175{
1176 gimple vec_stmt_for_operand;
1177 stmt_vec_info def_stmt_info;
1178
1179 /* Do nothing; can reuse same def. */
8644a673 1180 if (dt == vect_external_def || dt == vect_constant_def )
ebfd146a
IR
1181 return vec_oprnd;
1182
1183 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1184 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1185 gcc_assert (def_stmt_info);
1186 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1187 gcc_assert (vec_stmt_for_operand);
1188 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1189 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1190 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1191 else
1192 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1193 return vec_oprnd;
1194}
1195
1196
1197/* Get vectorized definitions for the operands to create a copy of an original
1198 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1199
1200static void
b8698a0f
L
1201vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1202 VEC(tree,heap) **vec_oprnds0,
ebfd146a
IR
1203 VEC(tree,heap) **vec_oprnds1)
1204{
1205 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1206
1207 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1208 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1209
1210 if (vec_oprnds1 && *vec_oprnds1)
1211 {
1212 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1213 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1214 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1215 }
1216}
1217
1218
1219/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1220
1221static void
1222vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1223 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1224 slp_tree slp_node)
1225{
1226 if (slp_node)
b5aeb3bb 1227 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1, -1);
ebfd146a
IR
1228 else
1229 {
1230 tree vec_oprnd;
1231
b8698a0f
L
1232 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1233 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
ebfd146a
IR
1234 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1235
1236 if (op1)
1237 {
b8698a0f
L
1238 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1239 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
ebfd146a
IR
1240 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1241 }
1242 }
1243}
1244
1245
1246/* Function vect_finish_stmt_generation.
1247
1248 Insert a new stmt. */
1249
1250void
1251vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1252 gimple_stmt_iterator *gsi)
1253{
1254 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 1256 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a
IR
1257
1258 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1259
1260 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1261
b8698a0f 1262 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
a70d6342 1263 bb_vinfo));
ebfd146a
IR
1264
1265 if (vect_print_dump_info (REPORT_DETAILS))
1266 {
1267 fprintf (vect_dump, "add new stmt: ");
1268 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1269 }
1270
1271 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1272}
1273
1274/* Checks if CALL can be vectorized in type VECTYPE. Returns
1275 a function declaration if the target has a vectorized version
1276 of the function, or NULL_TREE if the function cannot be vectorized. */
1277
1278tree
1279vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1280{
1281 tree fndecl = gimple_call_fndecl (call);
ebfd146a
IR
1282
1283 /* We only handle functions that do not read or clobber memory -- i.e.
1284 const or novops ones. */
1285 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1286 return NULL_TREE;
1287
1288 if (!fndecl
1289 || TREE_CODE (fndecl) != FUNCTION_DECL
1290 || !DECL_BUILT_IN (fndecl))
1291 return NULL_TREE;
1292
62f7fd21 1293 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
ebfd146a
IR
1294 vectype_in);
1295}
1296
1297/* Function vectorizable_call.
1298
b8698a0f
L
1299 Check if STMT performs a function call that can be vectorized.
1300 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1301 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1302 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1303
1304static bool
1305vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1306{
1307 tree vec_dest;
1308 tree scalar_dest;
1309 tree op, type;
1310 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1311 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1312 tree vectype_out, vectype_in;
1313 int nunits_in;
1314 int nunits_out;
1315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
b690cc0f 1316 tree fndecl, new_temp, def, rhs_type;
ebfd146a
IR
1317 gimple def_stmt;
1318 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
63827fb8 1319 gimple new_stmt = NULL;
ebfd146a
IR
1320 int ncopies, j;
1321 VEC(tree, heap) *vargs = NULL;
1322 enum { NARROW, NONE, WIDEN } modifier;
1323 size_t i, nargs;
1324
a70d6342
IR
1325 /* FORNOW: unsupported in basic block SLP. */
1326 gcc_assert (loop_vinfo);
b8698a0f 1327
ebfd146a
IR
1328 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1329 return false;
1330
8644a673 1331 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1332 return false;
1333
1334 /* FORNOW: SLP not supported. */
1335 if (STMT_SLP_TYPE (stmt_info))
1336 return false;
1337
1338 /* Is STMT a vectorizable call? */
1339 if (!is_gimple_call (stmt))
1340 return false;
1341
1342 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1343 return false;
1344
b690cc0f
RG
1345 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1346
ebfd146a
IR
1347 /* Process function arguments. */
1348 rhs_type = NULL_TREE;
b690cc0f 1349 vectype_in = NULL_TREE;
ebfd146a
IR
1350 nargs = gimple_call_num_args (stmt);
1351
1352 /* Bail out if the function has more than two arguments, we
1353 do not have interesting builtin functions to vectorize with
1354 more than two arguments. No arguments is also not good. */
1355 if (nargs == 0 || nargs > 2)
1356 return false;
1357
1358 for (i = 0; i < nargs; i++)
1359 {
b690cc0f
RG
1360 tree opvectype;
1361
ebfd146a
IR
1362 op = gimple_call_arg (stmt, i);
1363
1364 /* We can only handle calls with arguments of the same type. */
1365 if (rhs_type
8533c9d8 1366 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
ebfd146a
IR
1367 {
1368 if (vect_print_dump_info (REPORT_DETAILS))
1369 fprintf (vect_dump, "argument types differ.");
1370 return false;
1371 }
b690cc0f
RG
1372 if (!rhs_type)
1373 rhs_type = TREE_TYPE (op);
ebfd146a 1374
b690cc0f
RG
1375 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1376 &def_stmt, &def, &dt[i], &opvectype))
ebfd146a
IR
1377 {
1378 if (vect_print_dump_info (REPORT_DETAILS))
1379 fprintf (vect_dump, "use not simple.");
1380 return false;
1381 }
ebfd146a 1382
b690cc0f
RG
1383 if (!vectype_in)
1384 vectype_in = opvectype;
1385 else if (opvectype
1386 && opvectype != vectype_in)
1387 {
1388 if (vect_print_dump_info (REPORT_DETAILS))
1389 fprintf (vect_dump, "argument vector types differ.");
1390 return false;
1391 }
1392 }
1393 /* If all arguments are external or constant defs use a vector type with
1394 the same size as the output vector type. */
ebfd146a 1395 if (!vectype_in)
b690cc0f 1396 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1397 if (vec_stmt)
1398 gcc_assert (vectype_in);
1399 if (!vectype_in)
1400 {
1401 if (vect_print_dump_info (REPORT_DETAILS))
1402 {
1403 fprintf (vect_dump, "no vectype for scalar type ");
1404 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1405 }
1406
1407 return false;
1408 }
ebfd146a
IR
1409
1410 /* FORNOW */
b690cc0f
RG
1411 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1412 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1413 if (nunits_in == nunits_out / 2)
1414 modifier = NARROW;
1415 else if (nunits_out == nunits_in)
1416 modifier = NONE;
1417 else if (nunits_out == nunits_in / 2)
1418 modifier = WIDEN;
1419 else
1420 return false;
1421
1422 /* For now, we only vectorize functions if a target specific builtin
1423 is available. TODO -- in some cases, it might be profitable to
1424 insert the calls for pieces of the vector, in order to be able
1425 to vectorize other operations in the loop. */
1426 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1427 if (fndecl == NULL_TREE)
1428 {
1429 if (vect_print_dump_info (REPORT_DETAILS))
1430 fprintf (vect_dump, "function is not vectorizable.");
1431
1432 return false;
1433 }
1434
5006671f 1435 gcc_assert (!gimple_vuse (stmt));
ebfd146a
IR
1436
1437 if (modifier == NARROW)
1438 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1439 else
1440 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1441
1442 /* Sanity check: make sure that at least one copy of the vectorized stmt
1443 needs to be generated. */
1444 gcc_assert (ncopies >= 1);
1445
1446 if (!vec_stmt) /* transformation not required. */
1447 {
1448 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1449 if (vect_print_dump_info (REPORT_DETAILS))
1450 fprintf (vect_dump, "=== vectorizable_call ===");
1451 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1452 return true;
1453 }
1454
1455 /** Transform. **/
1456
1457 if (vect_print_dump_info (REPORT_DETAILS))
1458 fprintf (vect_dump, "transform operation.");
1459
1460 /* Handle def. */
1461 scalar_dest = gimple_call_lhs (stmt);
1462 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1463
1464 prev_stmt_info = NULL;
1465 switch (modifier)
1466 {
1467 case NONE:
1468 for (j = 0; j < ncopies; ++j)
1469 {
1470 /* Build argument list for the vectorized call. */
1471 if (j == 0)
1472 vargs = VEC_alloc (tree, heap, nargs);
1473 else
1474 VEC_truncate (tree, vargs, 0);
1475
1476 for (i = 0; i < nargs; i++)
1477 {
1478 op = gimple_call_arg (stmt, i);
1479 if (j == 0)
1480 vec_oprnd0
1481 = vect_get_vec_def_for_operand (op, stmt, NULL);
1482 else
63827fb8
IR
1483 {
1484 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1485 vec_oprnd0
1486 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1487 }
ebfd146a
IR
1488
1489 VEC_quick_push (tree, vargs, vec_oprnd0);
1490 }
1491
1492 new_stmt = gimple_build_call_vec (fndecl, vargs);
1493 new_temp = make_ssa_name (vec_dest, new_stmt);
1494 gimple_call_set_lhs (new_stmt, new_temp);
1495
1496 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7411b8f0 1497 mark_symbols_for_renaming (new_stmt);
ebfd146a
IR
1498
1499 if (j == 0)
1500 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1501 else
1502 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1503
1504 prev_stmt_info = vinfo_for_stmt (new_stmt);
1505 }
1506
1507 break;
1508
1509 case NARROW:
1510 for (j = 0; j < ncopies; ++j)
1511 {
1512 /* Build argument list for the vectorized call. */
1513 if (j == 0)
1514 vargs = VEC_alloc (tree, heap, nargs * 2);
1515 else
1516 VEC_truncate (tree, vargs, 0);
1517
1518 for (i = 0; i < nargs; i++)
1519 {
1520 op = gimple_call_arg (stmt, i);
1521 if (j == 0)
1522 {
1523 vec_oprnd0
1524 = vect_get_vec_def_for_operand (op, stmt, NULL);
1525 vec_oprnd1
63827fb8 1526 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
1527 }
1528 else
1529 {
63827fb8 1530 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i);
ebfd146a 1531 vec_oprnd0
63827fb8 1532 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
ebfd146a 1533 vec_oprnd1
63827fb8 1534 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
ebfd146a
IR
1535 }
1536
1537 VEC_quick_push (tree, vargs, vec_oprnd0);
1538 VEC_quick_push (tree, vargs, vec_oprnd1);
1539 }
1540
1541 new_stmt = gimple_build_call_vec (fndecl, vargs);
1542 new_temp = make_ssa_name (vec_dest, new_stmt);
1543 gimple_call_set_lhs (new_stmt, new_temp);
1544
1545 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7411b8f0 1546 mark_symbols_for_renaming (new_stmt);
ebfd146a
IR
1547
1548 if (j == 0)
1549 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1550 else
1551 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1552
1553 prev_stmt_info = vinfo_for_stmt (new_stmt);
1554 }
1555
1556 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1557
1558 break;
1559
1560 case WIDEN:
1561 /* No current target implements this case. */
1562 return false;
1563 }
1564
1565 VEC_free (tree, heap, vargs);
1566
1567 /* Update the exception handling table with the vector stmt if necessary. */
1568 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1569 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1570
1571 /* The call in STMT might prevent it from being removed in dce.
1572 We however cannot remove it here, due to the way the ssa name
1573 it defines is mapped to the new definition. So just replace
1574 rhs of the statement with something harmless. */
1575
1576 type = TREE_TYPE (scalar_dest);
1577 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1578 fold_convert (type, integer_zero_node));
1579 set_vinfo_for_stmt (new_stmt, stmt_info);
1580 set_vinfo_for_stmt (stmt, NULL);
1581 STMT_VINFO_STMT (stmt_info) = new_stmt;
1582 gsi_replace (gsi, new_stmt, false);
1583 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1584
1585 return true;
1586}
1587
1588
1589/* Function vect_gen_widened_results_half
1590
1591 Create a vector stmt whose code, type, number of arguments, and result
b8698a0f 1592 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
ebfd146a
IR
1593 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1594 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1595 needs to be created (DECL is a function-decl of a target-builtin).
1596 STMT is the original scalar stmt that we are vectorizing. */
1597
1598static gimple
1599vect_gen_widened_results_half (enum tree_code code,
1600 tree decl,
1601 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1602 tree vec_dest, gimple_stmt_iterator *gsi,
1603 gimple stmt)
b8698a0f 1604{
ebfd146a 1605 gimple new_stmt;
b8698a0f
L
1606 tree new_temp;
1607
1608 /* Generate half of the widened result: */
1609 if (code == CALL_EXPR)
1610 {
1611 /* Target specific support */
ebfd146a
IR
1612 if (op_type == binary_op)
1613 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1614 else
1615 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1616 new_temp = make_ssa_name (vec_dest, new_stmt);
1617 gimple_call_set_lhs (new_stmt, new_temp);
b8698a0f
L
1618 }
1619 else
ebfd146a 1620 {
b8698a0f
L
1621 /* Generic support */
1622 gcc_assert (op_type == TREE_CODE_LENGTH (code));
ebfd146a
IR
1623 if (op_type != binary_op)
1624 vec_oprnd1 = NULL;
1625 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1626 vec_oprnd1);
1627 new_temp = make_ssa_name (vec_dest, new_stmt);
1628 gimple_assign_set_lhs (new_stmt, new_temp);
b8698a0f 1629 }
ebfd146a
IR
1630 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1631
ebfd146a
IR
1632 return new_stmt;
1633}
1634
1635
b8698a0f
L
1636/* Check if STMT performs a conversion operation, that can be vectorized.
1637 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1638 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1639 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1640
1641static bool
1642vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1643 gimple *vec_stmt, slp_tree slp_node)
1644{
1645 tree vec_dest;
1646 tree scalar_dest;
1647 tree op0;
1648 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1649 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1651 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1652 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1653 tree new_temp;
1654 tree def;
1655 gimple def_stmt;
1656 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1657 gimple new_stmt = NULL;
1658 stmt_vec_info prev_stmt_info;
1659 int nunits_in;
1660 int nunits_out;
1661 tree vectype_out, vectype_in;
1662 int ncopies, j;
b690cc0f 1663 tree rhs_type;
ebfd146a
IR
1664 tree builtin_decl;
1665 enum { NARROW, NONE, WIDEN } modifier;
1666 int i;
1667 VEC(tree,heap) *vec_oprnds0 = NULL;
1668 tree vop0;
ebfd146a
IR
1669 VEC(tree,heap) *dummy = NULL;
1670 int dummy_int;
1671
1672 /* Is STMT a vectorizable conversion? */
1673
a70d6342
IR
1674 /* FORNOW: unsupported in basic block SLP. */
1675 gcc_assert (loop_vinfo);
b8698a0f 1676
ebfd146a
IR
1677 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1678 return false;
1679
8644a673 1680 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1681 return false;
1682
1683 if (!is_gimple_assign (stmt))
1684 return false;
1685
1686 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1687 return false;
1688
1689 code = gimple_assign_rhs_code (stmt);
1690 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1691 return false;
1692
1693 /* Check types of lhs and rhs. */
b690cc0f
RG
1694 scalar_dest = gimple_assign_lhs (stmt);
1695 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1696
ebfd146a
IR
1697 op0 = gimple_assign_rhs1 (stmt);
1698 rhs_type = TREE_TYPE (op0);
b690cc0f
RG
1699 /* Check the operands of the operation. */
1700 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1701 &def_stmt, &def, &dt[0], &vectype_in))
1702 {
1703 if (vect_print_dump_info (REPORT_DETAILS))
1704 fprintf (vect_dump, "use not simple.");
1705 return false;
1706 }
1707 /* If op0 is an external or constant defs use a vector type of
1708 the same size as the output vector type. */
ebfd146a 1709 if (!vectype_in)
b690cc0f 1710 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
7d8930a0
IR
1711 if (vec_stmt)
1712 gcc_assert (vectype_in);
1713 if (!vectype_in)
1714 {
1715 if (vect_print_dump_info (REPORT_DETAILS))
1716 {
1717 fprintf (vect_dump, "no vectype for scalar type ");
1718 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1719 }
1720
1721 return false;
1722 }
ebfd146a
IR
1723
1724 /* FORNOW */
b690cc0f
RG
1725 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1726 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
ebfd146a
IR
1727 if (nunits_in == nunits_out / 2)
1728 modifier = NARROW;
1729 else if (nunits_out == nunits_in)
1730 modifier = NONE;
1731 else if (nunits_out == nunits_in / 2)
1732 modifier = WIDEN;
1733 else
1734 return false;
1735
ebfd146a
IR
1736 if (modifier == NARROW)
1737 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1738 else
1739 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1740
1741 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1742 this, so we can safely override NCOPIES with 1 here. */
1743 if (slp_node)
1744 ncopies = 1;
b8698a0f 1745
ebfd146a
IR
1746 /* Sanity check: make sure that at least one copy of the vectorized stmt
1747 needs to be generated. */
1748 gcc_assert (ncopies >= 1);
1749
ebfd146a
IR
1750 /* Supportable by target? */
1751 if ((modifier == NONE
88dd7150 1752 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
ebfd146a 1753 || (modifier == WIDEN
b690cc0f
RG
1754 && !supportable_widening_operation (code, stmt,
1755 vectype_out, vectype_in,
ebfd146a
IR
1756 &decl1, &decl2,
1757 &code1, &code2,
1758 &dummy_int, &dummy))
1759 || (modifier == NARROW
b690cc0f 1760 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
ebfd146a
IR
1761 &code1, &dummy_int, &dummy)))
1762 {
1763 if (vect_print_dump_info (REPORT_DETAILS))
1764 fprintf (vect_dump, "conversion not supported by target.");
1765 return false;
1766 }
1767
1768 if (modifier != NONE)
1769 {
ebfd146a
IR
1770 /* FORNOW: SLP not supported. */
1771 if (STMT_SLP_TYPE (stmt_info))
b8698a0f 1772 return false;
ebfd146a
IR
1773 }
1774
1775 if (!vec_stmt) /* transformation not required. */
1776 {
1777 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1778 return true;
1779 }
1780
1781 /** Transform. **/
1782 if (vect_print_dump_info (REPORT_DETAILS))
1783 fprintf (vect_dump, "transform conversion.");
1784
1785 /* Handle def. */
1786 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1787
1788 if (modifier == NONE && !slp_node)
1789 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1790
1791 prev_stmt_info = NULL;
1792 switch (modifier)
1793 {
1794 case NONE:
1795 for (j = 0; j < ncopies; j++)
1796 {
ebfd146a 1797 if (j == 0)
b8698a0f 1798 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
ebfd146a
IR
1799 else
1800 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1801
1802 builtin_decl =
88dd7150
RG
1803 targetm.vectorize.builtin_conversion (code,
1804 vectype_out, vectype_in);
ebfd146a 1805 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
b8698a0f 1806 {
ebfd146a
IR
1807 /* Arguments are ready. create the new vector stmt. */
1808 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1809 new_temp = make_ssa_name (vec_dest, new_stmt);
1810 gimple_call_set_lhs (new_stmt, new_temp);
1811 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
1812 if (slp_node)
1813 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1814 }
1815
1816 if (j == 0)
1817 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1818 else
1819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1820 prev_stmt_info = vinfo_for_stmt (new_stmt);
1821 }
1822 break;
1823
1824 case WIDEN:
1825 /* In case the vectorization factor (VF) is bigger than the number
1826 of elements that we can fit in a vectype (nunits), we have to
1827 generate more than one vector stmt - i.e - we need to "unroll"
1828 the vector stmt by a factor VF/nunits. */
1829 for (j = 0; j < ncopies; j++)
1830 {
1831 if (j == 0)
1832 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1833 else
1834 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1835
ebfd146a
IR
1836 /* Generate first half of the widened result: */
1837 new_stmt
b8698a0f 1838 = vect_gen_widened_results_half (code1, decl1,
ebfd146a
IR
1839 vec_oprnd0, vec_oprnd1,
1840 unary_op, vec_dest, gsi, stmt);
1841 if (j == 0)
1842 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1843 else
1844 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1845 prev_stmt_info = vinfo_for_stmt (new_stmt);
1846
1847 /* Generate second half of the widened result: */
1848 new_stmt
1849 = vect_gen_widened_results_half (code2, decl2,
1850 vec_oprnd0, vec_oprnd1,
1851 unary_op, vec_dest, gsi, stmt);
1852 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1853 prev_stmt_info = vinfo_for_stmt (new_stmt);
1854 }
1855 break;
1856
1857 case NARROW:
1858 /* In case the vectorization factor (VF) is bigger than the number
1859 of elements that we can fit in a vectype (nunits), we have to
1860 generate more than one vector stmt - i.e - we need to "unroll"
1861 the vector stmt by a factor VF/nunits. */
1862 for (j = 0; j < ncopies; j++)
1863 {
1864 /* Handle uses. */
1865 if (j == 0)
1866 {
1867 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1868 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1869 }
1870 else
1871 {
1872 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1873 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1874 }
1875
1876 /* Arguments are ready. Create the new vector stmt. */
ebfd146a
IR
1877 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1878 vec_oprnd1);
1879 new_temp = make_ssa_name (vec_dest, new_stmt);
1880 gimple_assign_set_lhs (new_stmt, new_temp);
1881 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1882
1883 if (j == 0)
1884 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1885 else
1886 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1887
1888 prev_stmt_info = vinfo_for_stmt (new_stmt);
1889 }
1890
1891 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1892 }
1893
1894 if (vec_oprnds0)
b8698a0f 1895 VEC_free (tree, heap, vec_oprnds0);
ebfd146a
IR
1896
1897 return true;
1898}
1899/* Function vectorizable_assignment.
1900
b8698a0f
L
1901 Check if STMT performs an assignment (copy) that can be vectorized.
1902 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
1903 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1904 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1905
1906static bool
1907vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1908 gimple *vec_stmt, slp_tree slp_node)
1909{
1910 tree vec_dest;
1911 tree scalar_dest;
1912 tree op;
1913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1914 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1916 tree new_temp;
1917 tree def;
1918 gimple def_stmt;
1919 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
fde9c428 1920 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 1921 int ncopies;
f18b55bd 1922 int i, j;
ebfd146a
IR
1923 VEC(tree,heap) *vec_oprnds = NULL;
1924 tree vop;
a70d6342 1925 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f18b55bd
IR
1926 gimple new_stmt = NULL;
1927 stmt_vec_info prev_stmt_info = NULL;
fde9c428
RG
1928 enum tree_code code;
1929 tree vectype_in;
ebfd146a
IR
1930
1931 /* Multiple types in SLP are handled by creating the appropriate number of
1932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1933 case of SLP. */
1934 if (slp_node)
1935 ncopies = 1;
1936 else
1937 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1938
1939 gcc_assert (ncopies >= 1);
ebfd146a 1940
a70d6342 1941 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
1942 return false;
1943
8644a673 1944 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
1945 return false;
1946
1947 /* Is vectorizable assignment? */
1948 if (!is_gimple_assign (stmt))
1949 return false;
1950
1951 scalar_dest = gimple_assign_lhs (stmt);
1952 if (TREE_CODE (scalar_dest) != SSA_NAME)
1953 return false;
1954
fde9c428 1955 code = gimple_assign_rhs_code (stmt);
ebfd146a 1956 if (gimple_assign_single_p (stmt)
fde9c428
RG
1957 || code == PAREN_EXPR
1958 || CONVERT_EXPR_CODE_P (code))
ebfd146a
IR
1959 op = gimple_assign_rhs1 (stmt);
1960 else
1961 return false;
1962
fde9c428
RG
1963 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1964 &def_stmt, &def, &dt[0], &vectype_in))
ebfd146a
IR
1965 {
1966 if (vect_print_dump_info (REPORT_DETAILS))
1967 fprintf (vect_dump, "use not simple.");
1968 return false;
1969 }
1970
fde9c428
RG
1971 /* We can handle NOP_EXPR conversions that do not change the number
1972 of elements or the vector size. */
1973 if (CONVERT_EXPR_CODE_P (code)
1974 && (!vectype_in
1975 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
1976 || (GET_MODE_SIZE (TYPE_MODE (vectype))
1977 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
1978 return false;
1979
ebfd146a
IR
1980 if (!vec_stmt) /* transformation not required. */
1981 {
1982 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1983 if (vect_print_dump_info (REPORT_DETAILS))
1984 fprintf (vect_dump, "=== vectorizable_assignment ===");
1985 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1986 return true;
1987 }
1988
1989 /** Transform. **/
1990 if (vect_print_dump_info (REPORT_DETAILS))
1991 fprintf (vect_dump, "transform assignment.");
1992
1993 /* Handle def. */
1994 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1995
1996 /* Handle use. */
f18b55bd 1997 for (j = 0; j < ncopies; j++)
ebfd146a 1998 {
f18b55bd
IR
1999 /* Handle uses. */
2000 if (j == 0)
2001 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2002 else
2003 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2004
2005 /* Arguments are ready. create the new vector stmt. */
2006 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
2007 {
fde9c428 2008 if (CONVERT_EXPR_CODE_P (code))
4a73490d 2009 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
f18b55bd
IR
2010 new_stmt = gimple_build_assign (vec_dest, vop);
2011 new_temp = make_ssa_name (vec_dest, new_stmt);
2012 gimple_assign_set_lhs (new_stmt, new_temp);
2013 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2014 if (slp_node)
2015 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2016 }
ebfd146a
IR
2017
2018 if (slp_node)
f18b55bd
IR
2019 continue;
2020
2021 if (j == 0)
2022 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2023 else
2024 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2025
2026 prev_stmt_info = vinfo_for_stmt (new_stmt);
2027 }
b8698a0f
L
2028
2029 VEC_free (tree, heap, vec_oprnds);
ebfd146a
IR
2030 return true;
2031}
2032
2033/* Function vectorizable_operation.
2034
b8698a0f
L
2035 Check if STMT performs a binary or unary operation that can be vectorized.
2036 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
2037 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2038 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2039
2040static bool
2041vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
2042 gimple *vec_stmt, slp_tree slp_node)
2043{
2044 tree vec_dest;
2045 tree scalar_dest;
2046 tree op0, op1 = NULL;
2047 tree vec_oprnd1 = NULL_TREE;
2048 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b690cc0f 2049 tree vectype;
ebfd146a
IR
2050 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2051 enum tree_code code;
2052 enum machine_mode vec_mode;
2053 tree new_temp;
2054 int op_type;
2055 optab optab;
2056 int icode;
2057 enum machine_mode optab_op2_mode;
2058 tree def;
2059 gimple def_stmt;
2060 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2061 gimple new_stmt = NULL;
2062 stmt_vec_info prev_stmt_info;
b690cc0f 2063 int nunits_in;
ebfd146a
IR
2064 int nunits_out;
2065 tree vectype_out;
2066 int ncopies;
2067 int j, i;
2068 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2069 tree vop0, vop1;
2070 unsigned int k;
ebfd146a 2071 bool scalar_shift_arg = false;
a70d6342
IR
2072 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2073 int vf;
2074
a70d6342 2075 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
2076 return false;
2077
8644a673 2078 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2079 return false;
2080
2081 /* Is STMT a vectorizable binary/unary operation? */
2082 if (!is_gimple_assign (stmt))
2083 return false;
2084
2085 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2086 return false;
2087
ebfd146a
IR
2088 code = gimple_assign_rhs_code (stmt);
2089
2090 /* For pointer addition, we should use the normal plus for
2091 the vector addition. */
2092 if (code == POINTER_PLUS_EXPR)
2093 code = PLUS_EXPR;
2094
2095 /* Support only unary or binary operations. */
2096 op_type = TREE_CODE_LENGTH (code);
2097 if (op_type != unary_op && op_type != binary_op)
2098 {
2099 if (vect_print_dump_info (REPORT_DETAILS))
2100 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2101 return false;
2102 }
2103
b690cc0f
RG
2104 scalar_dest = gimple_assign_lhs (stmt);
2105 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2106
ebfd146a 2107 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
2108 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2109 &def_stmt, &def, &dt[0], &vectype))
ebfd146a
IR
2110 {
2111 if (vect_print_dump_info (REPORT_DETAILS))
2112 fprintf (vect_dump, "use not simple.");
2113 return false;
2114 }
b690cc0f
RG
2115 /* If op0 is an external or constant def use a vector type with
2116 the same size as the output vector type. */
2117 if (!vectype)
2118 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
2119 if (vec_stmt)
2120 gcc_assert (vectype);
2121 if (!vectype)
2122 {
2123 if (vect_print_dump_info (REPORT_DETAILS))
2124 {
2125 fprintf (vect_dump, "no vectype for scalar type ");
2126 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2127 }
2128
2129 return false;
2130 }
b690cc0f
RG
2131
2132 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2133 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2134 if (nunits_out != nunits_in)
2135 return false;
ebfd146a
IR
2136
2137 if (op_type == binary_op)
2138 {
2139 op1 = gimple_assign_rhs2 (stmt);
b8698a0f 2140 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 2141 &dt[1]))
ebfd146a
IR
2142 {
2143 if (vect_print_dump_info (REPORT_DETAILS))
2144 fprintf (vect_dump, "use not simple.");
2145 return false;
2146 }
2147 }
2148
b690cc0f
RG
2149 if (loop_vinfo)
2150 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2151 else
2152 vf = 1;
2153
2154 /* Multiple types in SLP are handled by creating the appropriate number of
2155 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2156 case of SLP. */
2157 if (slp_node)
2158 ncopies = 1;
2159 else
2160 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2161
2162 gcc_assert (ncopies >= 1);
2163
ebfd146a
IR
2164 /* If this is a shift/rotate, determine whether the shift amount is a vector,
2165 or scalar. If the shift/rotate amount is a vector, use the vector/vector
2166 shift optabs. */
2167 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2168 || code == RROTATE_EXPR)
2169 {
ebfd146a 2170 /* vector shifted by vector */
8644a673 2171 if (dt[1] == vect_internal_def)
ebfd146a
IR
2172 {
2173 optab = optab_for_tree_code (code, vectype, optab_vector);
2174 if (vect_print_dump_info (REPORT_DETAILS))
2175 fprintf (vect_dump, "vector/vector shift/rotate found.");
2176 }
2177
2178 /* See if the machine has a vector shifted by scalar insn and if not
2179 then see if it has a vector shifted by vector insn */
8644a673 2180 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
ebfd146a
IR
2181 {
2182 optab = optab_for_tree_code (code, vectype, optab_scalar);
2183 if (optab
2184 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2185 != CODE_FOR_nothing))
2186 {
2187 scalar_shift_arg = true;
2188 if (vect_print_dump_info (REPORT_DETAILS))
2189 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2190 }
2191 else
2192 {
2193 optab = optab_for_tree_code (code, vectype, optab_vector);
ad6c0864 2194 if (optab
ebfd146a
IR
2195 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2196 != CODE_FOR_nothing))
ad6c0864
MM
2197 {
2198 if (vect_print_dump_info (REPORT_DETAILS))
2199 fprintf (vect_dump, "vector/vector shift/rotate found.");
2200
2201 /* Unlike the other binary operators, shifts/rotates have
2202 the rhs being int, instead of the same type as the lhs,
2203 so make sure the scalar is the right type if we are
2204 dealing with vectors of short/char. */
2205 if (dt[1] == vect_constant_def)
2206 op1 = fold_convert (TREE_TYPE (vectype), op1);
2207 }
ebfd146a
IR
2208 }
2209 }
2210
2211 else
2212 {
2213 if (vect_print_dump_info (REPORT_DETAILS))
2214 fprintf (vect_dump, "operand mode requires invariant argument.");
2215 return false;
2216 }
2217 }
2218 else
2219 optab = optab_for_tree_code (code, vectype, optab_default);
2220
2221 /* Supportable by target? */
2222 if (!optab)
2223 {
2224 if (vect_print_dump_info (REPORT_DETAILS))
2225 fprintf (vect_dump, "no optab.");
2226 return false;
2227 }
2228 vec_mode = TYPE_MODE (vectype);
2229 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2230 if (icode == CODE_FOR_nothing)
2231 {
2232 if (vect_print_dump_info (REPORT_DETAILS))
2233 fprintf (vect_dump, "op not supported by target.");
2234 /* Check only during analysis. */
2235 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
a70d6342 2236 || (vf < vect_min_worthwhile_factor (code)
ebfd146a
IR
2237 && !vec_stmt))
2238 return false;
2239 if (vect_print_dump_info (REPORT_DETAILS))
2240 fprintf (vect_dump, "proceeding using word mode.");
2241 }
2242
2243 /* Worthwhile without SIMD support? Check only during analysis. */
2244 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
a70d6342 2245 && vf < vect_min_worthwhile_factor (code)
ebfd146a
IR
2246 && !vec_stmt)
2247 {
2248 if (vect_print_dump_info (REPORT_DETAILS))
2249 fprintf (vect_dump, "not worthwhile without SIMD support.");
2250 return false;
2251 }
2252
2253 if (!vec_stmt) /* transformation not required. */
2254 {
2255 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2256 if (vect_print_dump_info (REPORT_DETAILS))
2257 fprintf (vect_dump, "=== vectorizable_operation ===");
2258 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2259 return true;
2260 }
2261
2262 /** Transform. **/
2263
2264 if (vect_print_dump_info (REPORT_DETAILS))
2265 fprintf (vect_dump, "transform binary/unary operation.");
2266
2267 /* Handle def. */
2268 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2269
b8698a0f 2270 /* Allocate VECs for vector operands. In case of SLP, vector operands are
ebfd146a
IR
2271 created in the previous stages of the recursion, so no allocation is
2272 needed, except for the case of shift with scalar shift argument. In that
2273 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2274 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
b8698a0f
L
2275 In case of loop-based vectorization we allocate VECs of size 1. We
2276 allocate VEC_OPRNDS1 only in case of binary operation. */
ebfd146a
IR
2277 if (!slp_node)
2278 {
2279 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2280 if (op_type == binary_op)
2281 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2282 }
2283 else if (scalar_shift_arg)
b8698a0f 2284 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
ebfd146a
IR
2285
2286 /* In case the vectorization factor (VF) is bigger than the number
2287 of elements that we can fit in a vectype (nunits), we have to generate
2288 more than one vector stmt - i.e - we need to "unroll" the
2289 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2290 from one copy of the vector stmt to the next, in the field
2291 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2292 stages to find the correct vector defs to be used when vectorizing
2293 stmts that use the defs of the current stmt. The example below illustrates
2294 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2295 4 vectorized stmts):
2296
2297 before vectorization:
2298 RELATED_STMT VEC_STMT
2299 S1: x = memref - -
2300 S2: z = x + 1 - -
2301
2302 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2303 there):
2304 RELATED_STMT VEC_STMT
2305 VS1_0: vx0 = memref0 VS1_1 -
2306 VS1_1: vx1 = memref1 VS1_2 -
2307 VS1_2: vx2 = memref2 VS1_3 -
2308 VS1_3: vx3 = memref3 - -
2309 S1: x = load - VS1_0
2310 S2: z = x + 1 - -
2311
2312 step2: vectorize stmt S2 (done here):
2313 To vectorize stmt S2 we first need to find the relevant vector
2314 def for the first operand 'x'. This is, as usual, obtained from
2315 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2316 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2317 relevant vector def 'vx0'. Having found 'vx0' we can generate
2318 the vector stmt VS2_0, and as usual, record it in the
2319 STMT_VINFO_VEC_STMT of stmt S2.
2320 When creating the second copy (VS2_1), we obtain the relevant vector
2321 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2322 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2323 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2324 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2325 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2326 chain of stmts and pointers:
2327 RELATED_STMT VEC_STMT
2328 VS1_0: vx0 = memref0 VS1_1 -
2329 VS1_1: vx1 = memref1 VS1_2 -
2330 VS1_2: vx2 = memref2 VS1_3 -
2331 VS1_3: vx3 = memref3 - -
2332 S1: x = load - VS1_0
2333 VS2_0: vz0 = vx0 + v1 VS2_1 -
2334 VS2_1: vz1 = vx1 + v1 VS2_2 -
2335 VS2_2: vz2 = vx2 + v1 VS2_3 -
2336 VS2_3: vz3 = vx3 + v1 - -
2337 S2: z = x + 1 - VS2_0 */
2338
2339 prev_stmt_info = NULL;
2340 for (j = 0; j < ncopies; j++)
2341 {
2342 /* Handle uses. */
2343 if (j == 0)
2344 {
2345 if (op_type == binary_op && scalar_shift_arg)
2346 {
b8698a0f
L
2347 /* Vector shl and shr insn patterns can be defined with scalar
2348 operand 2 (shift operand). In this case, use constant or loop
2349 invariant op1 directly, without extending it to vector mode
ebfd146a
IR
2350 first. */
2351 optab_op2_mode = insn_data[icode].operand[2].mode;
2352 if (!VECTOR_MODE_P (optab_op2_mode))
2353 {
2354 if (vect_print_dump_info (REPORT_DETAILS))
2355 fprintf (vect_dump, "operand 1 using scalar mode.");
2356 vec_oprnd1 = op1;
2357 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2358 if (slp_node)
2359 {
2360 /* Store vec_oprnd1 for every vector stmt to be created
2361 for SLP_NODE. We check during the analysis that all the
b8698a0f
L
2362 shift arguments are the same.
2363 TODO: Allow different constants for different vector
2364 stmts generated for an SLP instance. */
ebfd146a
IR
2365 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2366 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2367 }
2368 }
2369 }
b8698a0f
L
2370
2371 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2372 (a special case for certain kind of vector shifts); otherwise,
ebfd146a
IR
2373 operand 1 should be of a vector type (the usual case). */
2374 if (op_type == binary_op && !vec_oprnd1)
b8698a0f 2375 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
ebfd146a
IR
2376 slp_node);
2377 else
b8698a0f 2378 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
ebfd146a
IR
2379 slp_node);
2380 }
2381 else
2382 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2383
2384 /* Arguments are ready. Create the new vector stmt. */
2385 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2386 {
2387 vop1 = ((op_type == binary_op)
2388 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2389 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2390 new_temp = make_ssa_name (vec_dest, new_stmt);
2391 gimple_assign_set_lhs (new_stmt, new_temp);
2392 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2393 if (slp_node)
2394 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2395 }
2396
2397 if (slp_node)
2398 continue;
2399
2400 if (j == 0)
2401 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2402 else
2403 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2404 prev_stmt_info = vinfo_for_stmt (new_stmt);
2405 }
2406
2407 VEC_free (tree, heap, vec_oprnds0);
2408 if (vec_oprnds1)
2409 VEC_free (tree, heap, vec_oprnds1);
2410
2411 return true;
2412}
2413
2414
2415/* Get vectorized definitions for loop-based vectorization. For the first
b8698a0f
L
2416 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2417 scalar operand), and for the rest we get a copy with
ebfd146a
IR
2418 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2419 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2420 The vectors are collected into VEC_OPRNDS. */
2421
2422static void
b8698a0f 2423vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
ebfd146a
IR
2424 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2425{
2426 tree vec_oprnd;
2427
2428 /* Get first vector operand. */
2429 /* All the vector operands except the very first one (that is scalar oprnd)
2430 are stmt copies. */
b8698a0f 2431 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
ebfd146a
IR
2432 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2433 else
2434 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2435
2436 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2437
2438 /* Get second vector operand. */
2439 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2440 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
b8698a0f 2441
ebfd146a
IR
2442 *oprnd = vec_oprnd;
2443
b8698a0f 2444 /* For conversion in multiple steps, continue to get operands
ebfd146a
IR
2445 recursively. */
2446 if (multi_step_cvt)
b8698a0f 2447 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
ebfd146a
IR
2448}
2449
2450
2451/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
b8698a0f 2452 For multi-step conversions store the resulting vectors and call the function
ebfd146a
IR
2453 recursively. */
2454
2455static void
2456vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2457 int multi_step_cvt, gimple stmt,
2458 VEC (tree, heap) *vec_dsts,
2459 gimple_stmt_iterator *gsi,
2460 slp_tree slp_node, enum tree_code code,
2461 stmt_vec_info *prev_stmt_info)
2462{
2463 unsigned int i;
2464 tree vop0, vop1, new_tmp, vec_dest;
2465 gimple new_stmt;
2466 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2467
b8698a0f 2468 vec_dest = VEC_pop (tree, vec_dsts);
ebfd146a
IR
2469
2470 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2471 {
2472 /* Create demotion operation. */
2473 vop0 = VEC_index (tree, *vec_oprnds, i);
2474 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2475 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2476 new_tmp = make_ssa_name (vec_dest, new_stmt);
2477 gimple_assign_set_lhs (new_stmt, new_tmp);
2478 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2479
2480 if (multi_step_cvt)
2481 /* Store the resulting vector for next recursive call. */
b8698a0f 2482 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
ebfd146a
IR
2483 else
2484 {
b8698a0f 2485 /* This is the last step of the conversion sequence. Store the
ebfd146a
IR
2486 vectors in SLP_NODE or in vector info of the scalar statement
2487 (or in STMT_VINFO_RELATED_STMT chain). */
2488 if (slp_node)
2489 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2490 else
2491 {
2492 if (!*prev_stmt_info)
2493 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2494 else
2495 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2496
2497 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2498 }
2499 }
2500 }
2501
2502 /* For multi-step demotion operations we first generate demotion operations
b8698a0f 2503 from the source type to the intermediate types, and then combine the
ebfd146a
IR
2504 results (stored in VEC_OPRNDS) in demotion operation to the destination
2505 type. */
2506 if (multi_step_cvt)
2507 {
2508 /* At each level of recursion we have have of the operands we had at the
2509 previous level. */
2510 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
b8698a0f 2511 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
ebfd146a
IR
2512 stmt, vec_dsts, gsi, slp_node,
2513 code, prev_stmt_info);
2514 }
2515}
2516
2517
2518/* Function vectorizable_type_demotion
2519
2520 Check if STMT performs a binary or unary operation that involves
2521 type demotion, and if it can be vectorized.
2522 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2523 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2524 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2525
2526static bool
2527vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2528 gimple *vec_stmt, slp_tree slp_node)
2529{
2530 tree vec_dest;
2531 tree scalar_dest;
2532 tree op0;
2533 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2534 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2535 enum tree_code code, code1 = ERROR_MARK;
2536 tree def;
2537 gimple def_stmt;
2538 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2539 stmt_vec_info prev_stmt_info;
2540 int nunits_in;
2541 int nunits_out;
2542 tree vectype_out;
2543 int ncopies;
2544 int j, i;
2545 tree vectype_in;
2546 int multi_step_cvt = 0;
2547 VEC (tree, heap) *vec_oprnds0 = NULL;
2548 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2549 tree last_oprnd, intermediate_type;
2550
a70d6342
IR
2551 /* FORNOW: not supported by basic block SLP vectorization. */
2552 gcc_assert (loop_vinfo);
2553
ebfd146a
IR
2554 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2555 return false;
2556
8644a673 2557 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2558 return false;
2559
2560 /* Is STMT a vectorizable type-demotion operation? */
2561 if (!is_gimple_assign (stmt))
2562 return false;
2563
2564 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2565 return false;
2566
2567 code = gimple_assign_rhs_code (stmt);
2568 if (!CONVERT_EXPR_CODE_P (code))
2569 return false;
2570
b690cc0f
RG
2571 scalar_dest = gimple_assign_lhs (stmt);
2572 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2573
2574 /* Check the operands of the operation. */
ebfd146a 2575 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
2576 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2577 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2578 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2579 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2580 && CONVERT_EXPR_CODE_P (code))))
2581 return false;
2582 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
2583 &def_stmt, &def, &dt[0], &vectype_in))
2584 {
2585 if (vect_print_dump_info (REPORT_DETAILS))
2586 fprintf (vect_dump, "use not simple.");
2587 return false;
2588 }
2589 /* If op0 is an external def use a vector type with the
2590 same size as the output vector type if possible. */
2591 if (!vectype_in)
2592 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
2593 if (vec_stmt)
2594 gcc_assert (vectype_in);
ebfd146a 2595 if (!vectype_in)
7d8930a0
IR
2596 {
2597 if (vect_print_dump_info (REPORT_DETAILS))
2598 {
2599 fprintf (vect_dump, "no vectype for scalar type ");
2600 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2601 }
2602
2603 return false;
2604 }
ebfd146a 2605
b690cc0f 2606 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
ebfd146a
IR
2607 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2608 if (nunits_in >= nunits_out)
2609 return false;
2610
2611 /* Multiple types in SLP are handled by creating the appropriate number of
2612 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2613 case of SLP. */
2614 if (slp_node)
2615 ncopies = 1;
2616 else
2617 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
ebfd146a
IR
2618 gcc_assert (ncopies >= 1);
2619
ebfd146a 2620 /* Supportable by target? */
b690cc0f
RG
2621 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
2622 &code1, &multi_step_cvt, &interm_types))
ebfd146a
IR
2623 return false;
2624
ebfd146a
IR
2625 if (!vec_stmt) /* transformation not required. */
2626 {
2627 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2628 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "=== vectorizable_demotion ===");
2630 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2631 return true;
2632 }
2633
2634 /** Transform. **/
2635 if (vect_print_dump_info (REPORT_DETAILS))
2636 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2637 ncopies);
2638
b8698a0f
L
2639 /* In case of multi-step demotion, we first generate demotion operations to
2640 the intermediate types, and then from that types to the final one.
ebfd146a 2641 We create vector destinations for the intermediate type (TYPES) received
b8698a0f 2642 from supportable_narrowing_operation, and store them in the correct order
ebfd146a
IR
2643 for future use in vect_create_vectorized_demotion_stmts(). */
2644 if (multi_step_cvt)
2645 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2646 else
2647 vec_dsts = VEC_alloc (tree, heap, 1);
b8698a0f 2648
ebfd146a
IR
2649 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2650 VEC_quick_push (tree, vec_dsts, vec_dest);
2651
2652 if (multi_step_cvt)
2653 {
b8698a0f 2654 for (i = VEC_length (tree, interm_types) - 1;
ebfd146a
IR
2655 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2656 {
b8698a0f 2657 vec_dest = vect_create_destination_var (scalar_dest,
ebfd146a
IR
2658 intermediate_type);
2659 VEC_quick_push (tree, vec_dsts, vec_dest);
2660 }
2661 }
2662
2663 /* In case the vectorization factor (VF) is bigger than the number
2664 of elements that we can fit in a vectype (nunits), we have to generate
2665 more than one vector stmt - i.e - we need to "unroll" the
2666 vector stmt by a factor VF/nunits. */
2667 last_oprnd = op0;
2668 prev_stmt_info = NULL;
2669 for (j = 0; j < ncopies; j++)
2670 {
2671 /* Handle uses. */
2672 if (slp_node)
b5aeb3bb 2673 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL, -1);
ebfd146a
IR
2674 else
2675 {
2676 VEC_free (tree, heap, vec_oprnds0);
2677 vec_oprnds0 = VEC_alloc (tree, heap,
2678 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
b8698a0f 2679 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
ebfd146a
IR
2680 vect_pow2 (multi_step_cvt) - 1);
2681 }
2682
2683 /* Arguments are ready. Create the new vector stmts. */
2684 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
b8698a0f 2685 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
ebfd146a 2686 multi_step_cvt, stmt, tmp_vec_dsts,
b8698a0f 2687 gsi, slp_node, code1,
ebfd146a
IR
2688 &prev_stmt_info);
2689 }
2690
2691 VEC_free (tree, heap, vec_oprnds0);
2692 VEC_free (tree, heap, vec_dsts);
2693 VEC_free (tree, heap, tmp_vec_dsts);
2694 VEC_free (tree, heap, interm_types);
2695
2696 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2697 return true;
2698}
2699
2700
2701/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
b8698a0f 2702 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
ebfd146a
IR
2703 the resulting vectors and call the function recursively. */
2704
2705static void
2706vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2707 VEC (tree, heap) **vec_oprnds1,
2708 int multi_step_cvt, gimple stmt,
2709 VEC (tree, heap) *vec_dsts,
2710 gimple_stmt_iterator *gsi,
2711 slp_tree slp_node, enum tree_code code1,
b8698a0f 2712 enum tree_code code2, tree decl1,
ebfd146a
IR
2713 tree decl2, int op_type,
2714 stmt_vec_info *prev_stmt_info)
2715{
2716 int i;
2717 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2718 gimple new_stmt1, new_stmt2;
2719 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2720 VEC (tree, heap) *vec_tmp;
2721
2722 vec_dest = VEC_pop (tree, vec_dsts);
2723 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2724
2725 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2726 {
2727 if (op_type == binary_op)
2728 vop1 = VEC_index (tree, *vec_oprnds1, i);
2729 else
2730 vop1 = NULL_TREE;
2731
2732 /* Generate the two halves of promotion operation. */
b8698a0f 2733 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
ebfd146a
IR
2734 op_type, vec_dest, gsi, stmt);
2735 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2736 op_type, vec_dest, gsi, stmt);
2737 if (is_gimple_call (new_stmt1))
2738 {
2739 new_tmp1 = gimple_call_lhs (new_stmt1);
2740 new_tmp2 = gimple_call_lhs (new_stmt2);
2741 }
2742 else
2743 {
2744 new_tmp1 = gimple_assign_lhs (new_stmt1);
2745 new_tmp2 = gimple_assign_lhs (new_stmt2);
2746 }
2747
2748 if (multi_step_cvt)
2749 {
2750 /* Store the results for the recursive call. */
2751 VEC_quick_push (tree, vec_tmp, new_tmp1);
2752 VEC_quick_push (tree, vec_tmp, new_tmp2);
2753 }
2754 else
2755 {
2756 /* Last step of promotion sequience - store the results. */
2757 if (slp_node)
2758 {
2759 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2760 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2761 }
2762 else
2763 {
2764 if (!*prev_stmt_info)
2765 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2766 else
2767 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2768
2769 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2770 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2771 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2772 }
2773 }
2774 }
2775
2776 if (multi_step_cvt)
2777 {
b8698a0f 2778 /* For multi-step promotion operation we first generate we call the
ebfd146a
IR
2779 function recurcively for every stage. We start from the input type,
2780 create promotion operations to the intermediate types, and then
2781 create promotions to the output type. */
2782 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2783 VEC_free (tree, heap, vec_tmp);
2784 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2785 multi_step_cvt - 1, stmt,
2786 vec_dsts, gsi, slp_node, code1,
2787 code2, decl2, decl2, op_type,
2788 prev_stmt_info);
2789 }
2790}
b8698a0f 2791
ebfd146a
IR
2792
2793/* Function vectorizable_type_promotion
2794
2795 Check if STMT performs a binary or unary operation that involves
2796 type promotion, and if it can be vectorized.
2797 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2798 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2799 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2800
2801static bool
2802vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2803 gimple *vec_stmt, slp_tree slp_node)
2804{
2805 tree vec_dest;
2806 tree scalar_dest;
2807 tree op0, op1 = NULL;
2808 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2809 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2810 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2811 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2812 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
b8698a0f 2813 int op_type;
ebfd146a
IR
2814 tree def;
2815 gimple def_stmt;
2816 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2817 stmt_vec_info prev_stmt_info;
2818 int nunits_in;
2819 int nunits_out;
2820 tree vectype_out;
2821 int ncopies;
2822 int j, i;
2823 tree vectype_in;
2824 tree intermediate_type = NULL_TREE;
2825 int multi_step_cvt = 0;
2826 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2827 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
b8698a0f 2828
a70d6342
IR
2829 /* FORNOW: not supported by basic block SLP vectorization. */
2830 gcc_assert (loop_vinfo);
b8698a0f 2831
ebfd146a
IR
2832 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2833 return false;
2834
8644a673 2835 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
2836 return false;
2837
2838 /* Is STMT a vectorizable type-promotion operation? */
2839 if (!is_gimple_assign (stmt))
2840 return false;
2841
2842 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2843 return false;
2844
2845 code = gimple_assign_rhs_code (stmt);
2846 if (!CONVERT_EXPR_CODE_P (code)
2847 && code != WIDEN_MULT_EXPR)
2848 return false;
2849
b690cc0f
RG
2850 scalar_dest = gimple_assign_lhs (stmt);
2851 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2852
2853 /* Check the operands of the operation. */
ebfd146a 2854 op0 = gimple_assign_rhs1 (stmt);
b690cc0f
RG
2855 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2856 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2857 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2858 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2859 && CONVERT_EXPR_CODE_P (code))))
2860 return false;
2861 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
2862 &def_stmt, &def, &dt[0], &vectype_in))
2863 {
2864 if (vect_print_dump_info (REPORT_DETAILS))
2865 fprintf (vect_dump, "use not simple.");
2866 return false;
2867 }
2868 /* If op0 is an external or constant def use a vector type with
2869 the same size as the output vector type. */
2870 if (!vectype_in)
2871 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
7d8930a0
IR
2872 if (vec_stmt)
2873 gcc_assert (vectype_in);
ebfd146a 2874 if (!vectype_in)
7d8930a0
IR
2875 {
2876 if (vect_print_dump_info (REPORT_DETAILS))
2877 {
2878 fprintf (vect_dump, "no vectype for scalar type ");
2879 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2880 }
2881
2882 return false;
2883 }
ebfd146a 2884
b690cc0f 2885 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
ebfd146a
IR
2886 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2887 if (nunits_in <= nunits_out)
2888 return false;
2889
2890 /* Multiple types in SLP are handled by creating the appropriate number of
2891 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2892 case of SLP. */
2893 if (slp_node)
2894 ncopies = 1;
2895 else
2896 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2897
2898 gcc_assert (ncopies >= 1);
2899
ebfd146a
IR
2900 op_type = TREE_CODE_LENGTH (code);
2901 if (op_type == binary_op)
2902 {
2903 op1 = gimple_assign_rhs2 (stmt);
a70d6342 2904 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
ebfd146a
IR
2905 {
2906 if (vect_print_dump_info (REPORT_DETAILS))
2907 fprintf (vect_dump, "use not simple.");
2908 return false;
2909 }
2910 }
2911
2912 /* Supportable by target? */
b690cc0f 2913 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
ebfd146a
IR
2914 &decl1, &decl2, &code1, &code2,
2915 &multi_step_cvt, &interm_types))
2916 return false;
2917
2918 /* Binary widening operation can only be supported directly by the
2919 architecture. */
2920 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2921
ebfd146a
IR
2922 if (!vec_stmt) /* transformation not required. */
2923 {
2924 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2925 if (vect_print_dump_info (REPORT_DETAILS))
2926 fprintf (vect_dump, "=== vectorizable_promotion ===");
2927 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2928 return true;
2929 }
2930
2931 /** Transform. **/
2932
2933 if (vect_print_dump_info (REPORT_DETAILS))
2934 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2935 ncopies);
2936
2937 /* Handle def. */
b8698a0f 2938 /* In case of multi-step promotion, we first generate promotion operations
ebfd146a 2939 to the intermediate types, and then from that types to the final one.
b8698a0f
L
2940 We store vector destination in VEC_DSTS in the correct order for
2941 recursive creation of promotion operations in
ebfd146a
IR
2942 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2943 according to TYPES recieved from supportable_widening_operation(). */
2944 if (multi_step_cvt)
2945 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2946 else
2947 vec_dsts = VEC_alloc (tree, heap, 1);
2948
2949 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2950 VEC_quick_push (tree, vec_dsts, vec_dest);
2951
2952 if (multi_step_cvt)
2953 {
2954 for (i = VEC_length (tree, interm_types) - 1;
2955 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2956 {
2957 vec_dest = vect_create_destination_var (scalar_dest,
2958 intermediate_type);
2959 VEC_quick_push (tree, vec_dsts, vec_dest);
2960 }
2961 }
b8698a0f 2962
ebfd146a
IR
2963 if (!slp_node)
2964 {
b8698a0f 2965 vec_oprnds0 = VEC_alloc (tree, heap,
ebfd146a
IR
2966 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2967 if (op_type == binary_op)
2968 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2969 }
2970
2971 /* In case the vectorization factor (VF) is bigger than the number
2972 of elements that we can fit in a vectype (nunits), we have to generate
2973 more than one vector stmt - i.e - we need to "unroll" the
2974 vector stmt by a factor VF/nunits. */
2975
2976 prev_stmt_info = NULL;
2977 for (j = 0; j < ncopies; j++)
2978 {
2979 /* Handle uses. */
2980 if (j == 0)
2981 {
2982 if (slp_node)
b5aeb3bb 2983 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1, -1);
ebfd146a
IR
2984 else
2985 {
2986 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2987 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2988 if (op_type == binary_op)
2989 {
2990 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2991 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2992 }
2993 }
2994 }
2995 else
2996 {
2997 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2998 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2999 if (op_type == binary_op)
3000 {
3001 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3002 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
3003 }
3004 }
3005
3006 /* Arguments are ready. Create the new vector stmts. */
3007 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
3008 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
b8698a0f 3009 multi_step_cvt, stmt,
ebfd146a
IR
3010 tmp_vec_dsts,
3011 gsi, slp_node, code1, code2,
3012 decl1, decl2, op_type,
3013 &prev_stmt_info);
3014 }
3015
3016 VEC_free (tree, heap, vec_dsts);
3017 VEC_free (tree, heap, tmp_vec_dsts);
3018 VEC_free (tree, heap, interm_types);
3019 VEC_free (tree, heap, vec_oprnds0);
3020 VEC_free (tree, heap, vec_oprnds1);
3021
3022 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3023 return true;
3024}
3025
3026
3027/* Function vectorizable_store.
3028
b8698a0f
L
3029 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3030 can be vectorized.
3031 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3032 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3033 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3034
3035static bool
3036vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3037 slp_tree slp_node)
3038{
3039 tree scalar_dest;
3040 tree data_ref;
3041 tree op;
3042 tree vec_oprnd = NULL_TREE;
3043 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3044 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3045 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3046 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3047 struct loop *loop = NULL;
ebfd146a
IR
3048 enum machine_mode vec_mode;
3049 tree dummy;
3050 enum dr_alignment_support alignment_support_scheme;
3051 tree def;
3052 gimple def_stmt;
3053 enum vect_def_type dt;
3054 stmt_vec_info prev_stmt_info = NULL;
3055 tree dataref_ptr = NULL_TREE;
3056 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3057 int ncopies;
3058 int j;
3059 gimple next_stmt, first_stmt = NULL;
3060 bool strided_store = false;
3061 unsigned int group_size, i;
3062 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3063 bool inv_p;
3064 VEC(tree,heap) *vec_oprnds = NULL;
3065 bool slp = (slp_node != NULL);
ebfd146a 3066 unsigned int vec_num;
a70d6342
IR
3067 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3068
3069 if (loop_vinfo)
3070 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
3071
3072 /* Multiple types in SLP are handled by creating the appropriate number of
3073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3074 case of SLP. */
3075 if (slp)
3076 ncopies = 1;
3077 else
3078 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3079
3080 gcc_assert (ncopies >= 1);
3081
3082 /* FORNOW. This restriction should be relaxed. */
a70d6342 3083 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
ebfd146a
IR
3084 {
3085 if (vect_print_dump_info (REPORT_DETAILS))
3086 fprintf (vect_dump, "multiple types in nested loop.");
3087 return false;
3088 }
3089
a70d6342 3090 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3091 return false;
3092
8644a673 3093 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3094 return false;
3095
3096 /* Is vectorizable store? */
3097
3098 if (!is_gimple_assign (stmt))
3099 return false;
3100
3101 scalar_dest = gimple_assign_lhs (stmt);
3102 if (TREE_CODE (scalar_dest) != ARRAY_REF
3103 && TREE_CODE (scalar_dest) != INDIRECT_REF
e9dbe7bb
IR
3104 && TREE_CODE (scalar_dest) != COMPONENT_REF
3105 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
70f34814
RG
3106 && TREE_CODE (scalar_dest) != REALPART_EXPR
3107 && TREE_CODE (scalar_dest) != MEM_REF)
ebfd146a
IR
3108 return false;
3109
3110 gcc_assert (gimple_assign_single_p (stmt));
3111 op = gimple_assign_rhs1 (stmt);
a70d6342 3112 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
ebfd146a
IR
3113 {
3114 if (vect_print_dump_info (REPORT_DETAILS))
3115 fprintf (vect_dump, "use not simple.");
3116 return false;
3117 }
3118
3119 /* The scalar rhs type needs to be trivially convertible to the vector
3120 component type. This should always be the case. */
3121 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
b8698a0f 3122 {
ebfd146a
IR
3123 if (vect_print_dump_info (REPORT_DETAILS))
3124 fprintf (vect_dump, "??? operands of different types");
3125 return false;
3126 }
3127
3128 vec_mode = TYPE_MODE (vectype);
3129 /* FORNOW. In some cases can vectorize even if data-type not supported
3130 (e.g. - array initialization with 0). */
3131 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
3132 return false;
3133
3134 if (!STMT_VINFO_DATA_REF (stmt_info))
3135 return false;
3136
3137 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3138 {
3139 strided_store = true;
3140 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3141 if (!vect_strided_store_supported (vectype)
3142 && !PURE_SLP_STMT (stmt_info) && !slp)
3143 return false;
b8698a0f 3144
ebfd146a
IR
3145 if (first_stmt == stmt)
3146 {
3147 /* STMT is the leader of the group. Check the operands of all the
3148 stmts of the group. */
3149 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
3150 while (next_stmt)
3151 {
3152 gcc_assert (gimple_assign_single_p (next_stmt));
3153 op = gimple_assign_rhs1 (next_stmt);
b8698a0f 3154 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
a70d6342 3155 &def, &dt))
ebfd146a
IR
3156 {
3157 if (vect_print_dump_info (REPORT_DETAILS))
3158 fprintf (vect_dump, "use not simple.");
3159 return false;
3160 }
3161 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3162 }
3163 }
3164 }
3165
3166 if (!vec_stmt) /* transformation not required. */
3167 {
3168 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3169 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
3170 return true;
3171 }
3172
3173 /** Transform. **/
3174
3175 if (strided_store)
3176 {
3177 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3178 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3179
3180 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3181
3182 /* FORNOW */
a70d6342 3183 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
ebfd146a
IR
3184
3185 /* We vectorize all the stmts of the interleaving group when we
3186 reach the last stmt in the group. */
b8698a0f 3187 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
ebfd146a
IR
3188 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
3189 && !slp)
3190 {
3191 *vec_stmt = NULL;
3192 return true;
3193 }
3194
3195 if (slp)
4b5caab7
IR
3196 {
3197 strided_store = false;
3198 /* VEC_NUM is the number of vect stmts to be created for this
3199 group. */
3200 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3201 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3202 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3203 }
ebfd146a 3204 else
4b5caab7
IR
3205 /* VEC_NUM is the number of vect stmts to be created for this
3206 group. */
ebfd146a
IR
3207 vec_num = group_size;
3208 }
b8698a0f 3209 else
ebfd146a
IR
3210 {
3211 first_stmt = stmt;
3212 first_dr = dr;
3213 group_size = vec_num = 1;
ebfd146a 3214 }
b8698a0f 3215
ebfd146a
IR
3216 if (vect_print_dump_info (REPORT_DETAILS))
3217 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3218
3219 dr_chain = VEC_alloc (tree, heap, group_size);
3220 oprnds = VEC_alloc (tree, heap, group_size);
3221
720f5239 3222 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a 3223 gcc_assert (alignment_support_scheme);
ebfd146a
IR
3224
3225 /* In case the vectorization factor (VF) is bigger than the number
3226 of elements that we can fit in a vectype (nunits), we have to generate
3227 more than one vector stmt - i.e - we need to "unroll" the
b8698a0f 3228 vector stmt by a factor VF/nunits. For more details see documentation in
ebfd146a
IR
3229 vect_get_vec_def_for_copy_stmt. */
3230
3231 /* In case of interleaving (non-unit strided access):
3232
3233 S1: &base + 2 = x2
3234 S2: &base = x0
3235 S3: &base + 1 = x1
3236 S4: &base + 3 = x3
3237
3238 We create vectorized stores starting from base address (the access of the
3239 first stmt in the chain (S2 in the above example), when the last store stmt
3240 of the chain (S4) is reached:
3241
3242 VS1: &base = vx2
3243 VS2: &base + vec_size*1 = vx0
3244 VS3: &base + vec_size*2 = vx1
3245 VS4: &base + vec_size*3 = vx3
3246
3247 Then permutation statements are generated:
3248
3249 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3250 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3251 ...
b8698a0f 3252
ebfd146a
IR
3253 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3254 (the order of the data-refs in the output of vect_permute_store_chain
3255 corresponds to the order of scalar stmts in the interleaving chain - see
3256 the documentation of vect_permute_store_chain()).
3257
3258 In case of both multiple types and interleaving, above vector stores and
3259 permutation stmts are created for every copy. The result vector stmts are
3260 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
b8698a0f 3261 STMT_VINFO_RELATED_STMT for the next copies.
ebfd146a
IR
3262 */
3263
3264 prev_stmt_info = NULL;
3265 for (j = 0; j < ncopies; j++)
3266 {
3267 gimple new_stmt;
3268 gimple ptr_incr;
3269
3270 if (j == 0)
3271 {
3272 if (slp)
3273 {
3274 /* Get vectorized arguments for SLP_NODE. */
b5aeb3bb 3275 vect_get_slp_defs (slp_node, &vec_oprnds, NULL, -1);
ebfd146a
IR
3276
3277 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3278 }
3279 else
3280 {
b8698a0f
L
3281 /* For interleaved stores we collect vectorized defs for all the
3282 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3283 used as an input to vect_permute_store_chain(), and OPRNDS as
ebfd146a
IR
3284 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3285
3286 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3287 OPRNDS are of size 1. */
b8698a0f 3288 next_stmt = first_stmt;
ebfd146a
IR
3289 for (i = 0; i < group_size; i++)
3290 {
b8698a0f
L
3291 /* Since gaps are not supported for interleaved stores,
3292 GROUP_SIZE is the exact number of stmts in the chain.
3293 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3294 there is no interleaving, GROUP_SIZE is 1, and only one
ebfd146a
IR
3295 iteration of the loop will be executed. */
3296 gcc_assert (next_stmt
3297 && gimple_assign_single_p (next_stmt));
3298 op = gimple_assign_rhs1 (next_stmt);
3299
b8698a0f 3300 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
ebfd146a 3301 NULL);
b8698a0f
L
3302 VEC_quick_push(tree, dr_chain, vec_oprnd);
3303 VEC_quick_push(tree, oprnds, vec_oprnd);
ebfd146a
IR
3304 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3305 }
3306 }
3307
3308 /* We should have catched mismatched types earlier. */
3309 gcc_assert (useless_type_conversion_p (vectype,
3310 TREE_TYPE (vec_oprnd)));
b8698a0f
L
3311 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3312 &dummy, &ptr_incr, false,
5006671f 3313 &inv_p);
a70d6342 3314 gcc_assert (bb_vinfo || !inv_p);
ebfd146a 3315 }
b8698a0f 3316 else
ebfd146a 3317 {
b8698a0f
L
3318 /* For interleaved stores we created vectorized defs for all the
3319 defs stored in OPRNDS in the previous iteration (previous copy).
3320 DR_CHAIN is then used as an input to vect_permute_store_chain(),
ebfd146a
IR
3321 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3322 next copy.
3323 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3324 OPRNDS are of size 1. */
3325 for (i = 0; i < group_size; i++)
3326 {
3327 op = VEC_index (tree, oprnds, i);
b8698a0f 3328 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 3329 &dt);
b8698a0f 3330 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
ebfd146a
IR
3331 VEC_replace(tree, dr_chain, i, vec_oprnd);
3332 VEC_replace(tree, oprnds, i, vec_oprnd);
3333 }
b8698a0f 3334 dataref_ptr =
ebfd146a
IR
3335 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3336 }
3337
3338 if (strided_store)
3339 {
b8698a0f 3340 result_chain = VEC_alloc (tree, heap, group_size);
ebfd146a
IR
3341 /* Permute. */
3342 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3343 &result_chain))
3344 return false;
3345 }
3346
3347 next_stmt = first_stmt;
3348 for (i = 0; i < vec_num; i++)
3349 {
3350 if (i > 0)
3351 /* Bump the vector pointer. */
3352 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3353 NULL_TREE);
3354
3355 if (slp)
3356 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3357 else if (strided_store)
b8698a0f 3358 /* For strided stores vectorized defs are interleaved in
ebfd146a
IR
3359 vect_permute_store_chain(). */
3360 vec_oprnd = VEC_index (tree, result_chain, i);
3361
8f439681 3362 if (aligned_access_p (first_dr))
70f34814 3363 data_ref = build_simple_mem_ref (dataref_ptr);
8f439681
RE
3364 else
3365 {
3366 int mis = DR_MISALIGNMENT (first_dr);
3367 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3368 tmis = size_binop (MULT_EXPR, tmis, size_int (BITS_PER_UNIT));
3369 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3370 }
3371
5006671f
RG
3372 /* If accesses through a pointer to vectype do not alias the original
3373 memory reference we have a problem. This should never happen. */
3374 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3375 get_alias_set (gimple_assign_lhs (stmt))));
ebfd146a
IR
3376
3377 /* Arguments are ready. Create the new vector stmt. */
3378 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3380 mark_symbols_for_renaming (new_stmt);
3381
3382 if (slp)
3383 continue;
b8698a0f 3384
ebfd146a
IR
3385 if (j == 0)
3386 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3387 else
3388 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3389
3390 prev_stmt_info = vinfo_for_stmt (new_stmt);
3391 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3392 if (!next_stmt)
3393 break;
3394 }
3395 }
3396
b8698a0f
L
3397 VEC_free (tree, heap, dr_chain);
3398 VEC_free (tree, heap, oprnds);
ebfd146a 3399 if (result_chain)
b8698a0f 3400 VEC_free (tree, heap, result_chain);
ebfd146a
IR
3401
3402 return true;
3403}
3404
3405/* vectorizable_load.
3406
b8698a0f
L
3407 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3408 can be vectorized.
3409 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
ebfd146a
IR
3410 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3411 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3412
3413static bool
3414vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3415 slp_tree slp_node, slp_instance slp_node_instance)
3416{
3417 tree scalar_dest;
3418 tree vec_dest = NULL;
3419 tree data_ref = NULL;
3420 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b8698a0f 3421 stmt_vec_info prev_stmt_info;
ebfd146a 3422 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 3423 struct loop *loop = NULL;
ebfd146a 3424 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
a70d6342 3425 bool nested_in_vect_loop = false;
ebfd146a
IR
3426 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3427 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3428 tree new_temp;
3429 int mode;
3430 gimple new_stmt = NULL;
3431 tree dummy;
3432 enum dr_alignment_support alignment_support_scheme;
3433 tree dataref_ptr = NULL_TREE;
3434 gimple ptr_incr;
3435 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3436 int ncopies;
3437 int i, j, group_size;
3438 tree msq = NULL_TREE, lsq;
3439 tree offset = NULL_TREE;
3440 tree realignment_token = NULL_TREE;
3441 gimple phi = NULL;
3442 VEC(tree,heap) *dr_chain = NULL;
3443 bool strided_load = false;
3444 gimple first_stmt;
3445 tree scalar_type;
3446 bool inv_p;
3447 bool compute_in_loop = false;
3448 struct loop *at_loop;
3449 int vec_num;
3450 bool slp = (slp_node != NULL);
3451 bool slp_perm = false;
3452 enum tree_code code;
a70d6342
IR
3453 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3454 int vf;
3455
3456 if (loop_vinfo)
3457 {
3458 loop = LOOP_VINFO_LOOP (loop_vinfo);
3459 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3460 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3461 }
3462 else
3533e503 3463 vf = 1;
ebfd146a
IR
3464
3465 /* Multiple types in SLP are handled by creating the appropriate number of
3466 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3467 case of SLP. */
3468 if (slp)
3469 ncopies = 1;
3470 else
3471 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3472
3473 gcc_assert (ncopies >= 1);
3474
3475 /* FORNOW. This restriction should be relaxed. */
3476 if (nested_in_vect_loop && ncopies > 1)
3477 {
3478 if (vect_print_dump_info (REPORT_DETAILS))
3479 fprintf (vect_dump, "multiple types in nested loop.");
3480 return false;
3481 }
3482
a70d6342 3483 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
ebfd146a
IR
3484 return false;
3485
8644a673 3486 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
ebfd146a
IR
3487 return false;
3488
3489 /* Is vectorizable load? */
3490 if (!is_gimple_assign (stmt))
3491 return false;
3492
3493 scalar_dest = gimple_assign_lhs (stmt);
3494 if (TREE_CODE (scalar_dest) != SSA_NAME)
3495 return false;
3496
3497 code = gimple_assign_rhs_code (stmt);
3498 if (code != ARRAY_REF
3499 && code != INDIRECT_REF
e9dbe7bb
IR
3500 && code != COMPONENT_REF
3501 && code != IMAGPART_EXPR
70f34814
RG
3502 && code != REALPART_EXPR
3503 && code != MEM_REF)
ebfd146a
IR
3504 return false;
3505
3506 if (!STMT_VINFO_DATA_REF (stmt_info))
3507 return false;
3508
3509 scalar_type = TREE_TYPE (DR_REF (dr));
3510 mode = (int) TYPE_MODE (vectype);
3511
3512 /* FORNOW. In some cases can vectorize even if data-type not supported
3513 (e.g. - data copies). */
3514 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3515 {
3516 if (vect_print_dump_info (REPORT_DETAILS))
3517 fprintf (vect_dump, "Aligned load, but unsupported type.");
3518 return false;
3519 }
3520
3521 /* The vector component type needs to be trivially convertible to the
3522 scalar lhs. This should always be the case. */
3523 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
b8698a0f 3524 {
ebfd146a
IR
3525 if (vect_print_dump_info (REPORT_DETAILS))
3526 fprintf (vect_dump, "??? operands of different types");
3527 return false;
3528 }
3529
3530 /* Check if the load is a part of an interleaving chain. */
3531 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3532 {
3533 strided_load = true;
3534 /* FORNOW */
3535 gcc_assert (! nested_in_vect_loop);
3536
3537 /* Check if interleaving is supported. */
3538 if (!vect_strided_load_supported (vectype)
3539 && !PURE_SLP_STMT (stmt_info) && !slp)
3540 return false;
3541 }
3542
3543 if (!vec_stmt) /* transformation not required. */
3544 {
3545 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3546 vect_model_load_cost (stmt_info, ncopies, NULL);
3547 return true;
3548 }
3549
3550 if (vect_print_dump_info (REPORT_DETAILS))
3551 fprintf (vect_dump, "transform load.");
3552
3553 /** Transform. **/
3554
3555 if (strided_load)
3556 {
3557 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3558 /* Check if the chain of loads is already vectorized. */
3559 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3560 {
3561 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3562 return true;
3563 }
3564 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3565 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3566
3567 /* VEC_NUM is the number of vect stmts to be created for this group. */
3568 if (slp)
3569 {
3570 strided_load = false;
3571 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
a70d6342
IR
3572 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3573 slp_perm = true;
3574 }
ebfd146a
IR
3575 else
3576 vec_num = group_size;
3577
3578 dr_chain = VEC_alloc (tree, heap, vec_num);
3579 }
3580 else
3581 {
3582 first_stmt = stmt;
3583 first_dr = dr;
3584 group_size = vec_num = 1;
3585 }
3586
720f5239 3587 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
ebfd146a
IR
3588 gcc_assert (alignment_support_scheme);
3589
3590 /* In case the vectorization factor (VF) is bigger than the number
3591 of elements that we can fit in a vectype (nunits), we have to generate
3592 more than one vector stmt - i.e - we need to "unroll" the
3593 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3594 from one copy of the vector stmt to the next, in the field
3595 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3596 stages to find the correct vector defs to be used when vectorizing
3597 stmts that use the defs of the current stmt. The example below illustrates
3598 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3599 4 vectorized stmts):
3600
3601 before vectorization:
3602 RELATED_STMT VEC_STMT
3603 S1: x = memref - -
3604 S2: z = x + 1 - -
3605
3606 step 1: vectorize stmt S1:
3607 We first create the vector stmt VS1_0, and, as usual, record a
3608 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3609 Next, we create the vector stmt VS1_1, and record a pointer to
3610 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3611 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3612 stmts and pointers:
3613 RELATED_STMT VEC_STMT
3614 VS1_0: vx0 = memref0 VS1_1 -
3615 VS1_1: vx1 = memref1 VS1_2 -
3616 VS1_2: vx2 = memref2 VS1_3 -
3617 VS1_3: vx3 = memref3 - -
3618 S1: x = load - VS1_0
3619 S2: z = x + 1 - -
3620
b8698a0f
L
3621 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3622 information we recorded in RELATED_STMT field is used to vectorize
ebfd146a
IR
3623 stmt S2. */
3624
3625 /* In case of interleaving (non-unit strided access):
3626
3627 S1: x2 = &base + 2
3628 S2: x0 = &base
3629 S3: x1 = &base + 1
3630 S4: x3 = &base + 3
3631
b8698a0f 3632 Vectorized loads are created in the order of memory accesses
ebfd146a
IR
3633 starting from the access of the first stmt of the chain:
3634
3635 VS1: vx0 = &base
3636 VS2: vx1 = &base + vec_size*1
3637 VS3: vx3 = &base + vec_size*2
3638 VS4: vx4 = &base + vec_size*3
3639
3640 Then permutation statements are generated:
3641
3642 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3643 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3644 ...
3645
3646 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3647 (the order of the data-refs in the output of vect_permute_load_chain
3648 corresponds to the order of scalar stmts in the interleaving chain - see
3649 the documentation of vect_permute_load_chain()).
3650 The generation of permutation stmts and recording them in
3651 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3652
b8698a0f 3653 In case of both multiple types and interleaving, the vector loads and
ebfd146a
IR
3654 permutation stmts above are created for every copy. The result vector stmts
3655 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3656 STMT_VINFO_RELATED_STMT for the next copies. */
3657
3658 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3659 on a target that supports unaligned accesses (dr_unaligned_supported)
3660 we generate the following code:
3661 p = initial_addr;
3662 indx = 0;
3663 loop {
3664 p = p + indx * vectype_size;
3665 vec_dest = *(p);
3666 indx = indx + 1;
3667 }
3668
3669 Otherwise, the data reference is potentially unaligned on a target that
b8698a0f 3670 does not support unaligned accesses (dr_explicit_realign_optimized) -
ebfd146a
IR
3671 then generate the following code, in which the data in each iteration is
3672 obtained by two vector loads, one from the previous iteration, and one
3673 from the current iteration:
3674 p1 = initial_addr;
3675 msq_init = *(floor(p1))
3676 p2 = initial_addr + VS - 1;
3677 realignment_token = call target_builtin;
3678 indx = 0;
3679 loop {
3680 p2 = p2 + indx * vectype_size
3681 lsq = *(floor(p2))
3682 vec_dest = realign_load (msq, lsq, realignment_token)
3683 indx = indx + 1;
3684 msq = lsq;
3685 } */
3686
3687 /* If the misalignment remains the same throughout the execution of the
3688 loop, we can create the init_addr and permutation mask at the loop
3689 preheader. Otherwise, it needs to be created inside the loop.
3690 This can only occur when vectorizing memory accesses in the inner-loop
3691 nested within an outer-loop that is being vectorized. */
3692
a70d6342 3693 if (loop && nested_in_vect_loop_p (loop, stmt)
ebfd146a
IR
3694 && (TREE_INT_CST_LOW (DR_STEP (dr))
3695 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3696 {
3697 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3698 compute_in_loop = true;
3699 }
3700
3701 if ((alignment_support_scheme == dr_explicit_realign_optimized
3702 || alignment_support_scheme == dr_explicit_realign)
3703 && !compute_in_loop)
3704 {
3705 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3706 alignment_support_scheme, NULL_TREE,
3707 &at_loop);
3708 if (alignment_support_scheme == dr_explicit_realign_optimized)
3709 {
3710 phi = SSA_NAME_DEF_STMT (msq);
3711 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3712 }
3713 }
3714 else
3715 at_loop = loop;
3716
3717 prev_stmt_info = NULL;
3718 for (j = 0; j < ncopies; j++)
b8698a0f 3719 {
ebfd146a
IR
3720 /* 1. Create the vector pointer update chain. */
3721 if (j == 0)
3722 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
b8698a0f
L
3723 at_loop, offset,
3724 &dummy, &ptr_incr, false,
5006671f 3725 &inv_p);
ebfd146a 3726 else
b8698a0f 3727 dataref_ptr =
ebfd146a
IR
3728 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3729
3730 for (i = 0; i < vec_num; i++)
3731 {
3732 if (i > 0)
3733 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3734 NULL_TREE);
3735
3736 /* 2. Create the vector-load in the loop. */
3737 switch (alignment_support_scheme)
3738 {
3739 case dr_aligned:
3740 gcc_assert (aligned_access_p (first_dr));
70f34814 3741 data_ref = build_simple_mem_ref (dataref_ptr);
ebfd146a
IR
3742 break;
3743 case dr_unaligned_supported:
3744 {
3745 int mis = DR_MISALIGNMENT (first_dr);
3746 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3747
3748 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3749 data_ref =
3750 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3751 break;
3752 }
3753 case dr_explicit_realign:
3754 {
3755 tree ptr, bump;
3756 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3757
3758 if (compute_in_loop)
3759 msq = vect_setup_realignment (first_stmt, gsi,
3760 &realignment_token,
b8698a0f 3761 dr_explicit_realign,
ebfd146a
IR
3762 dataref_ptr, NULL);
3763
3764 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3765 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3766 new_stmt = gimple_build_assign (vec_dest, data_ref);
3767 new_temp = make_ssa_name (vec_dest, new_stmt);
3768 gimple_assign_set_lhs (new_stmt, new_temp);
5006671f
RG
3769 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3770 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
ebfd146a 3771 vect_finish_stmt_generation (stmt, new_stmt, gsi);
ebfd146a
IR
3772 msq = new_temp;
3773
3774 bump = size_binop (MULT_EXPR, vs_minus_1,
3775 TYPE_SIZE_UNIT (scalar_type));
3776 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3777 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3778 break;
3779 }
3780 case dr_explicit_realign_optimized:
3781 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3782 break;
3783 default:
3784 gcc_unreachable ();
3785 }
5006671f
RG
3786 /* If accesses through a pointer to vectype do not alias the original
3787 memory reference we have a problem. This should never happen. */
3788 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3789 get_alias_set (gimple_assign_rhs1 (stmt))));
ebfd146a
IR
3790 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3791 new_stmt = gimple_build_assign (vec_dest, data_ref);
3792 new_temp = make_ssa_name (vec_dest, new_stmt);
3793 gimple_assign_set_lhs (new_stmt, new_temp);
3794 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3795 mark_symbols_for_renaming (new_stmt);
3796
3797 /* 3. Handle explicit realignment if necessary/supported. Create in
3798 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3799 if (alignment_support_scheme == dr_explicit_realign_optimized
3800 || alignment_support_scheme == dr_explicit_realign)
3801 {
3802 tree tmp;
3803
3804 lsq = gimple_assign_lhs (new_stmt);
3805 if (!realignment_token)
3806 realignment_token = dataref_ptr;
3807 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3808 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3809 realignment_token);
3810 new_stmt = gimple_build_assign (vec_dest, tmp);
3811 new_temp = make_ssa_name (vec_dest, new_stmt);
3812 gimple_assign_set_lhs (new_stmt, new_temp);
3813 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3814
3815 if (alignment_support_scheme == dr_explicit_realign_optimized)
3816 {
3817 gcc_assert (phi);
3818 if (i == vec_num - 1 && j == ncopies - 1)
f5045c96
AM
3819 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
3820 UNKNOWN_LOCATION);
ebfd146a
IR
3821 msq = lsq;
3822 }
3823 }
3824
3825 /* 4. Handle invariant-load. */
a70d6342 3826 if (inv_p && !bb_vinfo)
ebfd146a
IR
3827 {
3828 gcc_assert (!strided_load);
3829 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3830 if (j == 0)
3831 {
3832 int k;
3833 tree t = NULL_TREE;
3834 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3835
3836 /* CHECKME: bitpos depends on endianess? */
3837 bitpos = bitsize_zero_node;
b8698a0f 3838 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
ebfd146a 3839 bitsize, bitpos);
b8698a0f 3840 vec_dest =
ebfd146a
IR
3841 vect_create_destination_var (scalar_dest, NULL_TREE);
3842 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3843 new_temp = make_ssa_name (vec_dest, new_stmt);
3844 gimple_assign_set_lhs (new_stmt, new_temp);
3845 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3846
3847 for (k = nunits - 1; k >= 0; --k)
3848 t = tree_cons (NULL_TREE, new_temp, t);
3849 /* FIXME: use build_constructor directly. */
3850 vec_inv = build_constructor_from_list (vectype, t);
3851 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3852 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3853 }
3854 else
3855 gcc_unreachable (); /* FORNOW. */
3856 }
3857
3858 /* Collect vector loads and later create their permutation in
3859 vect_transform_strided_load (). */
3860 if (strided_load || slp_perm)
3861 VEC_quick_push (tree, dr_chain, new_temp);
3862
3863 /* Store vector loads in the corresponding SLP_NODE. */
3864 if (slp && !slp_perm)
3865 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3866 }
3867
3868 if (slp && !slp_perm)
3869 continue;
3870
3871 if (slp_perm)
3872 {
a70d6342 3873 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
ebfd146a
IR
3874 slp_node_instance, false))
3875 {
3876 VEC_free (tree, heap, dr_chain);
3877 return false;
3878 }
3879 }
3880 else
3881 {
3882 if (strided_load)
3883 {
3884 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
b8698a0f 3885 return false;
ebfd146a
IR
3886
3887 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3888 VEC_free (tree, heap, dr_chain);
3889 dr_chain = VEC_alloc (tree, heap, group_size);
3890 }
3891 else
3892 {
3893 if (j == 0)
3894 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3895 else
3896 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3897 prev_stmt_info = vinfo_for_stmt (new_stmt);
3898 }
3899 }
3900 }
3901
3902 if (dr_chain)
3903 VEC_free (tree, heap, dr_chain);
3904
3905 return true;
3906}
3907
3908/* Function vect_is_simple_cond.
b8698a0f 3909
ebfd146a
IR
3910 Input:
3911 LOOP - the loop that is being vectorized.
3912 COND - Condition that is checked for simple use.
3913
3914 Returns whether a COND can be vectorized. Checks whether
3915 condition operands are supportable using vec_is_simple_use. */
3916
3917static bool
3918vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3919{
3920 tree lhs, rhs;
3921 tree def;
3922 enum vect_def_type dt;
3923
3924 if (!COMPARISON_CLASS_P (cond))
3925 return false;
3926
3927 lhs = TREE_OPERAND (cond, 0);
3928 rhs = TREE_OPERAND (cond, 1);
3929
3930 if (TREE_CODE (lhs) == SSA_NAME)
3931 {
3932 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
b8698a0f 3933 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
a70d6342 3934 &dt))
ebfd146a
IR
3935 return false;
3936 }
3937 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3938 && TREE_CODE (lhs) != FIXED_CST)
3939 return false;
3940
3941 if (TREE_CODE (rhs) == SSA_NAME)
3942 {
3943 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
b8698a0f 3944 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
a70d6342 3945 &dt))
ebfd146a
IR
3946 return false;
3947 }
3948 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3949 && TREE_CODE (rhs) != FIXED_CST)
3950 return false;
3951
3952 return true;
3953}
3954
3955/* vectorizable_condition.
3956
b8698a0f
L
3957 Check if STMT is conditional modify expression that can be vectorized.
3958 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3959 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4bbe8262
IR
3960 at GSI.
3961
3962 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
3963 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
3964 else caluse if it is 2).
ebfd146a
IR
3965
3966 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3967
4bbe8262 3968bool
ebfd146a 3969vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4bbe8262 3970 gimple *vec_stmt, tree reduc_def, int reduc_index)
ebfd146a
IR
3971{
3972 tree scalar_dest = NULL_TREE;
3973 tree vec_dest = NULL_TREE;
3974 tree op = NULL_TREE;
3975 tree cond_expr, then_clause, else_clause;
3976 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3977 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3978 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3979 tree vec_compare, vec_cond_expr;
3980 tree new_temp;
3981 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3982 enum machine_mode vec_mode;
3983 tree def;
3984 enum vect_def_type dt;
3985 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3986 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3987 enum tree_code code;
3988
a70d6342
IR
3989 /* FORNOW: unsupported in basic block SLP. */
3990 gcc_assert (loop_vinfo);
b8698a0f 3991
ebfd146a
IR
3992 gcc_assert (ncopies >= 1);
3993 if (ncopies > 1)
3994 return false; /* FORNOW */
3995
3996 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3997 return false;
3998
4bbe8262
IR
3999 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4000 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
4001 && reduc_def))
ebfd146a
IR
4002 return false;
4003
4004 /* FORNOW: SLP not supported. */
4005 if (STMT_SLP_TYPE (stmt_info))
4006 return false;
4007
4008 /* FORNOW: not yet supported. */
b8698a0f 4009 if (STMT_VINFO_LIVE_P (stmt_info))
ebfd146a
IR
4010 {
4011 if (vect_print_dump_info (REPORT_DETAILS))
4012 fprintf (vect_dump, "value used after loop.");
4013 return false;
4014 }
4015
4016 /* Is vectorizable conditional operation? */
4017 if (!is_gimple_assign (stmt))
4018 return false;
4019
4020 code = gimple_assign_rhs_code (stmt);
4021
4022 if (code != COND_EXPR)
4023 return false;
4024
4025 gcc_assert (gimple_assign_single_p (stmt));
4026 op = gimple_assign_rhs1 (stmt);
4027 cond_expr = TREE_OPERAND (op, 0);
4028 then_clause = TREE_OPERAND (op, 1);
4029 else_clause = TREE_OPERAND (op, 2);
4030
4031 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4032 return false;
4033
4034 /* We do not handle two different vector types for the condition
4035 and the values. */
8533c9d8
SP
4036 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
4037 TREE_TYPE (vectype)))
ebfd146a
IR
4038 return false;
4039
4040 if (TREE_CODE (then_clause) == SSA_NAME)
4041 {
4042 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
b8698a0f 4043 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
ebfd146a
IR
4044 &then_def_stmt, &def, &dt))
4045 return false;
4046 }
b8698a0f 4047 else if (TREE_CODE (then_clause) != INTEGER_CST
ebfd146a
IR
4048 && TREE_CODE (then_clause) != REAL_CST
4049 && TREE_CODE (then_clause) != FIXED_CST)
4050 return false;
4051
4052 if (TREE_CODE (else_clause) == SSA_NAME)
4053 {
4054 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
a70d6342 4055 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
ebfd146a
IR
4056 &else_def_stmt, &def, &dt))
4057 return false;
4058 }
b8698a0f 4059 else if (TREE_CODE (else_clause) != INTEGER_CST
ebfd146a
IR
4060 && TREE_CODE (else_clause) != REAL_CST
4061 && TREE_CODE (else_clause) != FIXED_CST)
4062 return false;
4063
4064
4065 vec_mode = TYPE_MODE (vectype);
4066
b8698a0f 4067 if (!vec_stmt)
ebfd146a
IR
4068 {
4069 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8e7aa1f9 4070 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
ebfd146a
IR
4071 }
4072
4073 /* Transform */
4074
4075 /* Handle def. */
4076 scalar_dest = gimple_assign_lhs (stmt);
4077 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4078
4079 /* Handle cond expr. */
b8698a0f 4080 vec_cond_lhs =
ebfd146a 4081 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
b8698a0f 4082 vec_cond_rhs =
ebfd146a 4083 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
4bbe8262
IR
4084 if (reduc_index == 1)
4085 vec_then_clause = reduc_def;
4086 else
4087 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
4088 if (reduc_index == 2)
4089 vec_else_clause = reduc_def;
4090 else
4091 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
ebfd146a
IR
4092
4093 /* Arguments are ready. Create the new vector stmt. */
b8698a0f 4094 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
ebfd146a 4095 vec_cond_lhs, vec_cond_rhs);
b8698a0f 4096 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
ebfd146a
IR
4097 vec_compare, vec_then_clause, vec_else_clause);
4098
4099 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4100 new_temp = make_ssa_name (vec_dest, *vec_stmt);
4101 gimple_assign_set_lhs (*vec_stmt, new_temp);
4102 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
b8698a0f 4103
ebfd146a
IR
4104 return true;
4105}
4106
4107
8644a673 4108/* Make sure the statement is vectorizable. */
ebfd146a
IR
4109
4110bool
a70d6342 4111vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
ebfd146a 4112{
8644a673 4113 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
a70d6342 4114 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 4115 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
ebfd146a 4116 bool ok;
a70d6342 4117 tree scalar_type, vectype;
ebfd146a
IR
4118
4119 if (vect_print_dump_info (REPORT_DETAILS))
ebfd146a 4120 {
8644a673
IR
4121 fprintf (vect_dump, "==> examining statement: ");
4122 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4123 }
ebfd146a 4124
1825a1f3 4125 if (gimple_has_volatile_ops (stmt))
b8698a0f 4126 {
1825a1f3
IR
4127 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4128 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4129
4130 return false;
4131 }
b8698a0f
L
4132
4133 /* Skip stmts that do not need to be vectorized. In loops this is expected
8644a673
IR
4134 to include:
4135 - the COND_EXPR which is the loop exit condition
4136 - any LABEL_EXPRs in the loop
b8698a0f 4137 - computations that are used only for array indexing or loop control.
8644a673
IR
4138 In basic blocks we only analyze statements that are a part of some SLP
4139 instance, therefore, all the statements are relevant. */
ebfd146a 4140
b8698a0f 4141 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8644a673 4142 && !STMT_VINFO_LIVE_P (stmt_info))
ebfd146a
IR
4143 {
4144 if (vect_print_dump_info (REPORT_DETAILS))
8644a673 4145 fprintf (vect_dump, "irrelevant.");
ebfd146a 4146
8644a673
IR
4147 return true;
4148 }
ebfd146a 4149
8644a673
IR
4150 switch (STMT_VINFO_DEF_TYPE (stmt_info))
4151 {
4152 case vect_internal_def:
4153 break;
ebfd146a 4154
8644a673 4155 case vect_reduction_def:
7c5222ff 4156 case vect_nested_cycle:
a70d6342 4157 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
8644a673 4158 || relevance == vect_used_in_outer_by_reduction
a70d6342 4159 || relevance == vect_unused_in_scope));
8644a673
IR
4160 break;
4161
4162 case vect_induction_def:
4163 case vect_constant_def:
4164 case vect_external_def:
4165 case vect_unknown_def_type:
4166 default:
4167 gcc_unreachable ();
4168 }
ebfd146a 4169
a70d6342
IR
4170 if (bb_vinfo)
4171 {
4172 gcc_assert (PURE_SLP_STMT (stmt_info));
4173
b690cc0f 4174 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
a70d6342
IR
4175 if (vect_print_dump_info (REPORT_DETAILS))
4176 {
4177 fprintf (vect_dump, "get vectype for scalar type: ");
4178 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4179 }
4180
4181 vectype = get_vectype_for_scalar_type (scalar_type);
4182 if (!vectype)
4183 {
4184 if (vect_print_dump_info (REPORT_DETAILS))
4185 {
4186 fprintf (vect_dump, "not SLPed: unsupported data-type ");
4187 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4188 }
4189 return false;
4190 }
4191
4192 if (vect_print_dump_info (REPORT_DETAILS))
4193 {
4194 fprintf (vect_dump, "vectype: ");
4195 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4196 }
4197
4198 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4199 }
4200
8644a673 4201 if (STMT_VINFO_RELEVANT_P (stmt_info))
ebfd146a 4202 {
8644a673
IR
4203 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4204 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4205 *need_to_vectorize = true;
ebfd146a
IR
4206 }
4207
8644a673 4208 ok = true;
b8698a0f 4209 if (!bb_vinfo
a70d6342
IR
4210 && (STMT_VINFO_RELEVANT_P (stmt_info)
4211 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8644a673
IR
4212 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4213 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4214 || vectorizable_conversion (stmt, NULL, NULL, NULL)
4215 || vectorizable_operation (stmt, NULL, NULL, NULL)
4216 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4217 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4218 || vectorizable_call (stmt, NULL, NULL)
4219 || vectorizable_store (stmt, NULL, NULL, NULL)
b5aeb3bb 4220 || vectorizable_reduction (stmt, NULL, NULL, NULL)
4bbe8262 4221 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
a70d6342
IR
4222 else
4223 {
4224 if (bb_vinfo)
4225 ok = (vectorizable_operation (stmt, NULL, NULL, node)
4226 || vectorizable_assignment (stmt, NULL, NULL, node)
4227 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4228 || vectorizable_store (stmt, NULL, NULL, node));
b8698a0f 4229 }
8644a673
IR
4230
4231 if (!ok)
ebfd146a 4232 {
8644a673
IR
4233 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4234 {
4235 fprintf (vect_dump, "not vectorized: relevant stmt not ");
4236 fprintf (vect_dump, "supported: ");
4237 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4238 }
b8698a0f 4239
ebfd146a
IR
4240 return false;
4241 }
4242
a70d6342
IR
4243 if (bb_vinfo)
4244 return true;
4245
8644a673
IR
4246 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4247 need extra handling, except for vectorizable reductions. */
4248 if (STMT_VINFO_LIVE_P (stmt_info)
4249 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4250 ok = vectorizable_live_operation (stmt, NULL, NULL);
ebfd146a 4251
8644a673 4252 if (!ok)
ebfd146a 4253 {
8644a673
IR
4254 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4255 {
4256 fprintf (vect_dump, "not vectorized: live stmt not ");
4257 fprintf (vect_dump, "supported: ");
4258 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4259 }
b8698a0f 4260
8644a673 4261 return false;
ebfd146a
IR
4262 }
4263
8644a673 4264 if (!PURE_SLP_STMT (stmt_info))
ebfd146a 4265 {
b8698a0f
L
4266 /* Groups of strided accesses whose size is not a power of 2 are not
4267 vectorizable yet using loop-vectorization. Therefore, if this stmt
4268 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
a70d6342 4269 loop-based vectorized), the loop cannot be vectorized. */
8644a673
IR
4270 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4271 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4272 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
ebfd146a 4273 {
8644a673
IR
4274 if (vect_print_dump_info (REPORT_DETAILS))
4275 {
4276 fprintf (vect_dump, "not vectorized: the size of group "
4277 "of strided accesses is not a power of 2");
4278 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4279 }
4280
ebfd146a
IR
4281 return false;
4282 }
4283 }
b8698a0f 4284
ebfd146a
IR
4285 return true;
4286}
4287
4288
4289/* Function vect_transform_stmt.
4290
4291 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4292
4293bool
4294vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
b8698a0f 4295 bool *strided_store, slp_tree slp_node,
ebfd146a
IR
4296 slp_instance slp_node_instance)
4297{
4298 bool is_store = false;
4299 gimple vec_stmt = NULL;
4300 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4301 gimple orig_stmt_in_pattern;
4302 bool done;
ebfd146a
IR
4303
4304 switch (STMT_VINFO_TYPE (stmt_info))
4305 {
4306 case type_demotion_vec_info_type:
4307 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4308 gcc_assert (done);
4309 break;
4310
4311 case type_promotion_vec_info_type:
4312 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4313 gcc_assert (done);
4314 break;
4315
4316 case type_conversion_vec_info_type:
4317 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4318 gcc_assert (done);
4319 break;
4320
4321 case induc_vec_info_type:
4322 gcc_assert (!slp_node);
4323 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4324 gcc_assert (done);
4325 break;
4326
4327 case op_vec_info_type:
4328 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4329 gcc_assert (done);
4330 break;
4331
4332 case assignment_vec_info_type:
4333 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4334 gcc_assert (done);
4335 break;
4336
4337 case load_vec_info_type:
b8698a0f 4338 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
ebfd146a
IR
4339 slp_node_instance);
4340 gcc_assert (done);
4341 break;
4342
4343 case store_vec_info_type:
4344 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4345 gcc_assert (done);
4346 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4347 {
4348 /* In case of interleaving, the whole chain is vectorized when the
4349 last store in the chain is reached. Store stmts before the last
4350 one are skipped, and there vec_stmt_info shouldn't be freed
4351 meanwhile. */
4352 *strided_store = true;
4353 if (STMT_VINFO_VEC_STMT (stmt_info))
4354 is_store = true;
4355 }
4356 else
4357 is_store = true;
4358 break;
4359
4360 case condition_vec_info_type:
4361 gcc_assert (!slp_node);
4bbe8262 4362 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
ebfd146a
IR
4363 gcc_assert (done);
4364 break;
4365
4366 case call_vec_info_type:
4367 gcc_assert (!slp_node);
4368 done = vectorizable_call (stmt, gsi, &vec_stmt);
4369 break;
4370
4371 case reduc_vec_info_type:
b5aeb3bb 4372 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
ebfd146a
IR
4373 gcc_assert (done);
4374 break;
4375
4376 default:
4377 if (!STMT_VINFO_LIVE_P (stmt_info))
4378 {
4379 if (vect_print_dump_info (REPORT_DETAILS))
4380 fprintf (vect_dump, "stmt not supported.");
4381 gcc_unreachable ();
4382 }
4383 }
4384
4385 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4386 is being vectorized, but outside the immediately enclosing loop. */
4387 if (vec_stmt
a70d6342
IR
4388 && STMT_VINFO_LOOP_VINFO (stmt_info)
4389 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4390 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
ebfd146a
IR
4391 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4392 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
b8698a0f 4393 || STMT_VINFO_RELEVANT (stmt_info) ==
a70d6342 4394 vect_used_in_outer_by_reduction))
ebfd146a 4395 {
a70d6342
IR
4396 struct loop *innerloop = LOOP_VINFO_LOOP (
4397 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
ebfd146a
IR
4398 imm_use_iterator imm_iter;
4399 use_operand_p use_p;
4400 tree scalar_dest;
4401 gimple exit_phi;
4402
4403 if (vect_print_dump_info (REPORT_DETAILS))
a70d6342 4404 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
ebfd146a
IR
4405
4406 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4407 (to be used when vectorizing outer-loop stmts that use the DEF of
4408 STMT). */
4409 if (gimple_code (stmt) == GIMPLE_PHI)
4410 scalar_dest = PHI_RESULT (stmt);
4411 else
4412 scalar_dest = gimple_assign_lhs (stmt);
4413
4414 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4415 {
4416 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4417 {
4418 exit_phi = USE_STMT (use_p);
4419 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4420 }
4421 }
4422 }
4423
4424 /* Handle stmts whose DEF is used outside the loop-nest that is
4425 being vectorized. */
4426 if (STMT_VINFO_LIVE_P (stmt_info)
4427 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4428 {
4429 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4430 gcc_assert (done);
4431 }
4432
4433 if (vec_stmt)
4434 {
4435 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4436 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4437 if (orig_stmt_in_pattern)
4438 {
4439 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4440 /* STMT was inserted by the vectorizer to replace a computation idiom.
b8698a0f
L
4441 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4442 computed this idiom. We need to record a pointer to VEC_STMT in
4443 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
ebfd146a
IR
4444 documentation of vect_pattern_recog. */
4445 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4446 {
4447 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4448 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4449 }
4450 }
4451 }
4452
b8698a0f 4453 return is_store;
ebfd146a
IR
4454}
4455
4456
b8698a0f 4457/* Remove a group of stores (for SLP or interleaving), free their
ebfd146a
IR
4458 stmt_vec_info. */
4459
4460void
4461vect_remove_stores (gimple first_stmt)
4462{
4463 gimple next = first_stmt;
4464 gimple tmp;
4465 gimple_stmt_iterator next_si;
4466
4467 while (next)
4468 {
4469 /* Free the attached stmt_vec_info and remove the stmt. */
4470 next_si = gsi_for_stmt (next);
4471 gsi_remove (&next_si, true);
4472 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4473 free_stmt_vec_info (next);
4474 next = tmp;
4475 }
4476}
4477
4478
4479/* Function new_stmt_vec_info.
4480
4481 Create and initialize a new stmt_vec_info struct for STMT. */
4482
4483stmt_vec_info
b8698a0f 4484new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
a70d6342 4485 bb_vec_info bb_vinfo)
ebfd146a
IR
4486{
4487 stmt_vec_info res;
4488 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4489
4490 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4491 STMT_VINFO_STMT (res) = stmt;
4492 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
a70d6342 4493 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
8644a673 4494 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
ebfd146a
IR
4495 STMT_VINFO_LIVE_P (res) = false;
4496 STMT_VINFO_VECTYPE (res) = NULL;
4497 STMT_VINFO_VEC_STMT (res) = NULL;
4b5caab7 4498 STMT_VINFO_VECTORIZABLE (res) = true;
ebfd146a
IR
4499 STMT_VINFO_IN_PATTERN_P (res) = false;
4500 STMT_VINFO_RELATED_STMT (res) = NULL;
4501 STMT_VINFO_DATA_REF (res) = NULL;
4502
4503 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4504 STMT_VINFO_DR_OFFSET (res) = NULL;
4505 STMT_VINFO_DR_INIT (res) = NULL;
4506 STMT_VINFO_DR_STEP (res) = NULL;
4507 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4508
4509 if (gimple_code (stmt) == GIMPLE_PHI
4510 && is_loop_header_bb_p (gimple_bb (stmt)))
4511 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4512 else
8644a673
IR
4513 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4514
ebfd146a
IR
4515 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4516 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4517 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
32e8bb8e 4518 STMT_SLP_TYPE (res) = loop_vect;
ebfd146a
IR
4519 DR_GROUP_FIRST_DR (res) = NULL;
4520 DR_GROUP_NEXT_DR (res) = NULL;
4521 DR_GROUP_SIZE (res) = 0;
4522 DR_GROUP_STORE_COUNT (res) = 0;
4523 DR_GROUP_GAP (res) = 0;
4524 DR_GROUP_SAME_DR_STMT (res) = NULL;
4525 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4526
4527 return res;
4528}
4529
4530
4531/* Create a hash table for stmt_vec_info. */
4532
4533void
4534init_stmt_vec_info_vec (void)
4535{
4536 gcc_assert (!stmt_vec_info_vec);
4537 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4538}
4539
4540
4541/* Free hash table for stmt_vec_info. */
4542
4543void
4544free_stmt_vec_info_vec (void)
4545{
4546 gcc_assert (stmt_vec_info_vec);
4547 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4548}
4549
4550
4551/* Free stmt vectorization related info. */
4552
4553void
4554free_stmt_vec_info (gimple stmt)
4555{
4556 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4557
4558 if (!stmt_info)
4559 return;
4560
4561 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4562 set_vinfo_for_stmt (stmt, NULL);
4563 free (stmt_info);
4564}
4565
4566
4567/* Function get_vectype_for_scalar_type.
4568
4569 Returns the vector type corresponding to SCALAR_TYPE as supported
4570 by the target. */
4571
4572tree
4573get_vectype_for_scalar_type (tree scalar_type)
4574{
4575 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
2f816591 4576 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
ebfd146a
IR
4577 int nunits;
4578 tree vectype;
4579
4580 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4581 return NULL_TREE;
4582
2f816591
RG
4583 /* We can't build a vector type of elements with alignment bigger than
4584 their size. */
4585 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
4586 return NULL_TREE;
4587
6d7971b8
RG
4588 /* If we'd build a vector type of elements whose mode precision doesn't
4589 match their types precision we'll get mismatched types on vector
4590 extracts via BIT_FIELD_REFs. This effectively means we disable
4591 vectorization of bool and/or enum types in some languages. */
4592 if (INTEGRAL_TYPE_P (scalar_type)
4593 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
4594 return NULL_TREE;
4595
ebfd146a
IR
4596 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4597 is expected. */
4598 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4599
4600 vectype = build_vector_type (scalar_type, nunits);
4601 if (vect_print_dump_info (REPORT_DETAILS))
4602 {
4603 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4604 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4605 }
4606
4607 if (!vectype)
4608 return NULL_TREE;
4609
4610 if (vect_print_dump_info (REPORT_DETAILS))
4611 {
4612 fprintf (vect_dump, "vectype: ");
4613 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4614 }
4615
4616 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4617 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4618 {
4619 if (vect_print_dump_info (REPORT_DETAILS))
4620 fprintf (vect_dump, "mode not supported by target.");
4621 return NULL_TREE;
4622 }
4623
4624 return vectype;
4625}
4626
b690cc0f
RG
4627/* Function get_same_sized_vectype
4628
4629 Returns a vector type corresponding to SCALAR_TYPE of size
4630 VECTOR_TYPE if supported by the target. */
4631
4632tree
4633get_same_sized_vectype (tree scalar_type, tree vector_type ATTRIBUTE_UNUSED)
4634{
4635 return get_vectype_for_scalar_type (scalar_type);
4636}
4637
ebfd146a
IR
4638/* Function vect_is_simple_use.
4639
4640 Input:
a70d6342
IR
4641 LOOP_VINFO - the vect info of the loop that is being vectorized.
4642 BB_VINFO - the vect info of the basic block that is being vectorized.
4643 OPERAND - operand of a stmt in the loop or bb.
ebfd146a
IR
4644 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4645
4646 Returns whether a stmt with OPERAND can be vectorized.
b8698a0f
L
4647 For loops, supportable operands are constants, loop invariants, and operands
4648 that are defined by the current iteration of the loop. Unsupportable
4649 operands are those that are defined by a previous iteration of the loop (as
a70d6342
IR
4650 is the case in reduction/induction computations).
4651 For basic blocks, supportable operands are constants and bb invariants.
4652 For now, operands defined outside the basic block are not supported. */
ebfd146a
IR
4653
4654bool
b8698a0f 4655vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
a70d6342 4656 bb_vec_info bb_vinfo, gimple *def_stmt,
ebfd146a 4657 tree *def, enum vect_def_type *dt)
b8698a0f 4658{
ebfd146a
IR
4659 basic_block bb;
4660 stmt_vec_info stmt_vinfo;
a70d6342 4661 struct loop *loop = NULL;
b8698a0f 4662
a70d6342
IR
4663 if (loop_vinfo)
4664 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
4665
4666 *def_stmt = NULL;
4667 *def = NULL_TREE;
b8698a0f 4668
ebfd146a
IR
4669 if (vect_print_dump_info (REPORT_DETAILS))
4670 {
4671 fprintf (vect_dump, "vect_is_simple_use: operand ");
4672 print_generic_expr (vect_dump, operand, TDF_SLIM);
4673 }
b8698a0f 4674
ebfd146a
IR
4675 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4676 {
4677 *dt = vect_constant_def;
4678 return true;
4679 }
b8698a0f 4680
ebfd146a
IR
4681 if (is_gimple_min_invariant (operand))
4682 {
4683 *def = operand;
8644a673 4684 *dt = vect_external_def;
ebfd146a
IR
4685 return true;
4686 }
4687
4688 if (TREE_CODE (operand) == PAREN_EXPR)
4689 {
4690 if (vect_print_dump_info (REPORT_DETAILS))
4691 fprintf (vect_dump, "non-associatable copy.");
4692 operand = TREE_OPERAND (operand, 0);
4693 }
b8698a0f 4694
ebfd146a
IR
4695 if (TREE_CODE (operand) != SSA_NAME)
4696 {
4697 if (vect_print_dump_info (REPORT_DETAILS))
4698 fprintf (vect_dump, "not ssa-name.");
4699 return false;
4700 }
b8698a0f 4701
ebfd146a
IR
4702 *def_stmt = SSA_NAME_DEF_STMT (operand);
4703 if (*def_stmt == NULL)
4704 {
4705 if (vect_print_dump_info (REPORT_DETAILS))
4706 fprintf (vect_dump, "no def_stmt.");
4707 return false;
4708 }
4709
4710 if (vect_print_dump_info (REPORT_DETAILS))
4711 {
4712 fprintf (vect_dump, "def_stmt: ");
4713 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4714 }
4715
8644a673 4716 /* Empty stmt is expected only in case of a function argument.
ebfd146a
IR
4717 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4718 if (gimple_nop_p (*def_stmt))
4719 {
4720 *def = operand;
8644a673 4721 *dt = vect_external_def;
ebfd146a
IR
4722 return true;
4723 }
4724
4725 bb = gimple_bb (*def_stmt);
a70d6342
IR
4726
4727 if ((loop && !flow_bb_inside_loop_p (loop, bb))
4728 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
b8698a0f 4729 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
8644a673 4730 *dt = vect_external_def;
ebfd146a
IR
4731 else
4732 {
4733 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4734 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4735 }
4736
4737 if (*dt == vect_unknown_def_type)
4738 {
4739 if (vect_print_dump_info (REPORT_DETAILS))
4740 fprintf (vect_dump, "Unsupported pattern.");
4741 return false;
4742 }
4743
4744 if (vect_print_dump_info (REPORT_DETAILS))
4745 fprintf (vect_dump, "type of def: %d.",*dt);
4746
4747 switch (gimple_code (*def_stmt))
4748 {
4749 case GIMPLE_PHI:
4750 *def = gimple_phi_result (*def_stmt);
4751 break;
4752
4753 case GIMPLE_ASSIGN:
4754 *def = gimple_assign_lhs (*def_stmt);
4755 break;
4756
4757 case GIMPLE_CALL:
4758 *def = gimple_call_lhs (*def_stmt);
4759 if (*def != NULL)
4760 break;
4761 /* FALLTHRU */
4762 default:
4763 if (vect_print_dump_info (REPORT_DETAILS))
4764 fprintf (vect_dump, "unsupported defining stmt: ");
4765 return false;
4766 }
4767
4768 return true;
4769}
4770
b690cc0f
RG
4771/* Function vect_is_simple_use_1.
4772
4773 Same as vect_is_simple_use_1 but also determines the vector operand
4774 type of OPERAND and stores it to *VECTYPE. If the definition of
4775 OPERAND is vect_uninitialized_def, vect_constant_def or
4776 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
4777 is responsible to compute the best suited vector type for the
4778 scalar operand. */
4779
4780bool
4781vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
4782 bb_vec_info bb_vinfo, gimple *def_stmt,
4783 tree *def, enum vect_def_type *dt, tree *vectype)
4784{
4785 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
4786 return false;
4787
4788 /* Now get a vector type if the def is internal, otherwise supply
4789 NULL_TREE and leave it up to the caller to figure out a proper
4790 type for the use stmt. */
4791 if (*dt == vect_internal_def
4792 || *dt == vect_induction_def
4793 || *dt == vect_reduction_def
4794 || *dt == vect_double_reduction_def
4795 || *dt == vect_nested_cycle)
4796 {
4797 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
4798 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4799 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
4800 *vectype = STMT_VINFO_VECTYPE (stmt_info);
4801 gcc_assert (*vectype != NULL_TREE);
4802 }
4803 else if (*dt == vect_uninitialized_def
4804 || *dt == vect_constant_def
4805 || *dt == vect_external_def)
4806 *vectype = NULL_TREE;
4807 else
4808 gcc_unreachable ();
4809
4810 return true;
4811}
4812
ebfd146a
IR
4813
4814/* Function supportable_widening_operation
4815
b8698a0f
L
4816 Check whether an operation represented by the code CODE is a
4817 widening operation that is supported by the target platform in
b690cc0f
RG
4818 vector form (i.e., when operating on arguments of type VECTYPE_IN
4819 producing a result of type VECTYPE_OUT).
b8698a0f 4820
ebfd146a
IR
4821 Widening operations we currently support are NOP (CONVERT), FLOAT
4822 and WIDEN_MULT. This function checks if these operations are supported
4823 by the target platform either directly (via vector tree-codes), or via
4824 target builtins.
4825
4826 Output:
b8698a0f
L
4827 - CODE1 and CODE2 are codes of vector operations to be used when
4828 vectorizing the operation, if available.
ebfd146a
IR
4829 - DECL1 and DECL2 are decls of target builtin functions to be used
4830 when vectorizing the operation, if available. In this case,
b8698a0f 4831 CODE1 and CODE2 are CALL_EXPR.
ebfd146a
IR
4832 - MULTI_STEP_CVT determines the number of required intermediate steps in
4833 case of multi-step conversion (like char->short->int - in that case
4834 MULTI_STEP_CVT will be 1).
b8698a0f
L
4835 - INTERM_TYPES contains the intermediate type required to perform the
4836 widening operation (short in the above example). */
ebfd146a
IR
4837
4838bool
b690cc0f
RG
4839supportable_widening_operation (enum tree_code code, gimple stmt,
4840 tree vectype_out, tree vectype_in,
ebfd146a
IR
4841 tree *decl1, tree *decl2,
4842 enum tree_code *code1, enum tree_code *code2,
4843 int *multi_step_cvt,
4844 VEC (tree, heap) **interm_types)
4845{
4846 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4847 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4848 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4849 bool ordered_p;
4850 enum machine_mode vec_mode;
81f40b79 4851 enum insn_code icode1, icode2;
ebfd146a 4852 optab optab1, optab2;
b690cc0f
RG
4853 tree vectype = vectype_in;
4854 tree wide_vectype = vectype_out;
ebfd146a
IR
4855 enum tree_code c1, c2;
4856
4857 /* The result of a vectorized widening operation usually requires two vectors
b8698a0f
L
4858 (because the widened results do not fit int one vector). The generated
4859 vector results would normally be expected to be generated in the same
ebfd146a
IR
4860 order as in the original scalar computation, i.e. if 8 results are
4861 generated in each vector iteration, they are to be organized as follows:
b8698a0f 4862 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
ebfd146a 4863
b8698a0f 4864 However, in the special case that the result of the widening operation is
ebfd146a 4865 used in a reduction computation only, the order doesn't matter (because
b8698a0f 4866 when vectorizing a reduction we change the order of the computation).
ebfd146a
IR
4867 Some targets can take advantage of this and generate more efficient code.
4868 For example, targets like Altivec, that support widen_mult using a sequence
4869 of {mult_even,mult_odd} generate the following vectors:
4870 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4871
4872 When vectorizing outer-loops, we execute the inner-loop sequentially
b8698a0f
L
4873 (each vectorized inner-loop iteration contributes to VF outer-loop
4874 iterations in parallel). We therefore don't allow to change the order
ebfd146a
IR
4875 of the computation in the inner-loop during outer-loop vectorization. */
4876
4877 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4878 && !nested_in_vect_loop_p (vect_loop, stmt))
4879 ordered_p = false;
4880 else
4881 ordered_p = true;
4882
4883 if (!ordered_p
4884 && code == WIDEN_MULT_EXPR
4885 && targetm.vectorize.builtin_mul_widen_even
4886 && targetm.vectorize.builtin_mul_widen_even (vectype)
4887 && targetm.vectorize.builtin_mul_widen_odd
4888 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4889 {
4890 if (vect_print_dump_info (REPORT_DETAILS))
4891 fprintf (vect_dump, "Unordered widening operation detected.");
4892
4893 *code1 = *code2 = CALL_EXPR;
4894 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4895 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4896 return true;
4897 }
4898
4899 switch (code)
4900 {
4901 case WIDEN_MULT_EXPR:
4902 if (BYTES_BIG_ENDIAN)
4903 {
4904 c1 = VEC_WIDEN_MULT_HI_EXPR;
4905 c2 = VEC_WIDEN_MULT_LO_EXPR;
4906 }
4907 else
4908 {
4909 c2 = VEC_WIDEN_MULT_HI_EXPR;
4910 c1 = VEC_WIDEN_MULT_LO_EXPR;
4911 }
4912 break;
4913
4914 CASE_CONVERT:
4915 if (BYTES_BIG_ENDIAN)
4916 {
4917 c1 = VEC_UNPACK_HI_EXPR;
4918 c2 = VEC_UNPACK_LO_EXPR;
4919 }
4920 else
4921 {
4922 c2 = VEC_UNPACK_HI_EXPR;
4923 c1 = VEC_UNPACK_LO_EXPR;
4924 }
4925 break;
4926
4927 case FLOAT_EXPR:
4928 if (BYTES_BIG_ENDIAN)
4929 {
4930 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4931 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4932 }
4933 else
4934 {
4935 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4936 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4937 }
4938 break;
4939
4940 case FIX_TRUNC_EXPR:
4941 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4942 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4943 computing the operation. */
4944 return false;
4945
4946 default:
4947 gcc_unreachable ();
4948 }
4949
4950 if (code == FIX_TRUNC_EXPR)
4951 {
4952 /* The signedness is determined from output operand. */
b690cc0f
RG
4953 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
4954 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
ebfd146a
IR
4955 }
4956 else
4957 {
4958 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4959 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4960 }
4961
4962 if (!optab1 || !optab2)
4963 return false;
4964
4965 vec_mode = TYPE_MODE (vectype);
4966 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4967 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4968 == CODE_FOR_nothing)
4969 return false;
4970
b8698a0f 4971 /* Check if it's a multi-step conversion that can be done using intermediate
ebfd146a
IR
4972 types. */
4973 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4974 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4975 {
4976 int i;
4977 tree prev_type = vectype, intermediate_type;
4978 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4979 optab optab3, optab4;
4980
4981 if (!CONVERT_EXPR_CODE_P (code))
4982 return false;
b8698a0f 4983
ebfd146a
IR
4984 *code1 = c1;
4985 *code2 = c2;
b8698a0f 4986
ebfd146a
IR
4987 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4988 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4989 to get to NARROW_VECTYPE, and fail if we do not. */
4990 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4991 for (i = 0; i < 3; i++)
4992 {
4993 intermediate_mode = insn_data[icode1].operand[0].mode;
4994 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4995 TYPE_UNSIGNED (prev_type));
4996 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4997 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4998
4999 if (!optab3 || !optab4
5000 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
5001 == CODE_FOR_nothing
5002 || insn_data[icode1].operand[0].mode != intermediate_mode
5003 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
5004 == CODE_FOR_nothing
5005 || insn_data[icode2].operand[0].mode != intermediate_mode
b8698a0f 5006 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
ebfd146a
IR
5007 == CODE_FOR_nothing
5008 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
5009 == CODE_FOR_nothing)
5010 return false;
5011
5012 VEC_quick_push (tree, *interm_types, intermediate_type);
5013 (*multi_step_cvt)++;
5014
5015 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
5016 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
5017 return true;
5018
5019 prev_type = intermediate_type;
5020 prev_mode = intermediate_mode;
5021 }
5022
5023 return false;
5024 }
5025
5026 *code1 = c1;
5027 *code2 = c2;
5028 return true;
5029}
5030
5031
5032/* Function supportable_narrowing_operation
5033
b8698a0f
L
5034 Check whether an operation represented by the code CODE is a
5035 narrowing operation that is supported by the target platform in
b690cc0f
RG
5036 vector form (i.e., when operating on arguments of type VECTYPE_IN
5037 and producing a result of type VECTYPE_OUT).
b8698a0f 5038
ebfd146a
IR
5039 Narrowing operations we currently support are NOP (CONVERT) and
5040 FIX_TRUNC. This function checks if these operations are supported by
5041 the target platform directly via vector tree-codes.
5042
5043 Output:
b8698a0f
L
5044 - CODE1 is the code of a vector operation to be used when
5045 vectorizing the operation, if available.
ebfd146a
IR
5046 - MULTI_STEP_CVT determines the number of required intermediate steps in
5047 case of multi-step conversion (like int->short->char - in that case
5048 MULTI_STEP_CVT will be 1).
5049 - INTERM_TYPES contains the intermediate type required to perform the
b8698a0f 5050 narrowing operation (short in the above example). */
ebfd146a
IR
5051
5052bool
5053supportable_narrowing_operation (enum tree_code code,
b690cc0f 5054 tree vectype_out, tree vectype_in,
ebfd146a
IR
5055 enum tree_code *code1, int *multi_step_cvt,
5056 VEC (tree, heap) **interm_types)
5057{
5058 enum machine_mode vec_mode;
5059 enum insn_code icode1;
5060 optab optab1, interm_optab;
b690cc0f
RG
5061 tree vectype = vectype_in;
5062 tree narrow_vectype = vectype_out;
ebfd146a
IR
5063 enum tree_code c1;
5064 tree intermediate_type, prev_type;
5065 int i;
5066
5067 switch (code)
5068 {
5069 CASE_CONVERT:
5070 c1 = VEC_PACK_TRUNC_EXPR;
5071 break;
5072
5073 case FIX_TRUNC_EXPR:
5074 c1 = VEC_PACK_FIX_TRUNC_EXPR;
5075 break;
5076
5077 case FLOAT_EXPR:
5078 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
5079 tree code and optabs used for computing the operation. */
5080 return false;
5081
5082 default:
5083 gcc_unreachable ();
5084 }
5085
5086 if (code == FIX_TRUNC_EXPR)
5087 /* The signedness is determined from output operand. */
b690cc0f 5088 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
ebfd146a
IR
5089 else
5090 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5091
5092 if (!optab1)
5093 return false;
5094
5095 vec_mode = TYPE_MODE (vectype);
b8698a0f 5096 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
ebfd146a
IR
5097 == CODE_FOR_nothing)
5098 return false;
5099
5100 /* Check if it's a multi-step conversion that can be done using intermediate
5101 types. */
5102 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5103 {
5104 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5105
5106 *code1 = c1;
5107 prev_type = vectype;
5108 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5109 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
5110 to get to NARROW_VECTYPE, and fail if we do not. */
5111 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5112 for (i = 0; i < 3; i++)
5113 {
5114 intermediate_mode = insn_data[icode1].operand[0].mode;
5115 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5116 TYPE_UNSIGNED (prev_type));
b8698a0f 5117 interm_optab = optab_for_tree_code (c1, intermediate_type,
ebfd146a 5118 optab_default);
b8698a0f 5119 if (!interm_optab
ebfd146a
IR
5120 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
5121 == CODE_FOR_nothing
5122 || insn_data[icode1].operand[0].mode != intermediate_mode
b8698a0f 5123 || (icode1
ebfd146a
IR
5124 = interm_optab->handlers[(int) intermediate_mode].insn_code)
5125 == CODE_FOR_nothing)
5126 return false;
5127
5128 VEC_quick_push (tree, *interm_types, intermediate_type);
5129 (*multi_step_cvt)++;
5130
5131 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5132 return true;
5133
5134 prev_type = intermediate_type;
5135 prev_mode = intermediate_mode;
5136 }
5137
5138 return false;
5139 }
5140
5141 *code1 = c1;
5142 return true;
5143}