]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-stmts.c
matrix-reorg.c (analyze_matrix_allocation_site): Remove unused malloc_fname variable.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "toplev.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42
43
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
45
46 /* Function vect_mark_relevant.
47
48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
49
50 static void
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 enum vect_relevant relevant, bool live_p)
53 {
54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
57
58 if (vect_print_dump_info (REPORT_DETAILS))
59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
60
61 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
62 {
63 gimple pattern_stmt;
64
65 /* This is the last stmt in a sequence that was detected as a
66 pattern that can potentially be vectorized. Don't mark the stmt
67 as relevant/live because it's not going to be vectorized.
68 Instead mark the pattern-stmt that replaces it. */
69
70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
71
72 if (vect_print_dump_info (REPORT_DETAILS))
73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74 stmt_info = vinfo_for_stmt (pattern_stmt);
75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
78 stmt = pattern_stmt;
79 }
80
81 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83 STMT_VINFO_RELEVANT (stmt_info) = relevant;
84
85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
87 {
88 if (vect_print_dump_info (REPORT_DETAILS))
89 fprintf (vect_dump, "already marked relevant/live.");
90 return;
91 }
92
93 VEC_safe_push (gimple, heap, *worklist, stmt);
94 }
95
96
97 /* Function vect_stmt_relevant_p.
98
99 Return true if STMT in loop that is represented by LOOP_VINFO is
100 "relevant for vectorization".
101
102 A stmt is considered "relevant for vectorization" if:
103 - it has uses outside the loop.
104 - it has vdefs (it alters memory).
105 - control stmts in the loop (except for the exit condition).
106
107 CHECKME: what other side effects would the vectorizer allow? */
108
109 static bool
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 enum vect_relevant *relevant, bool *live_p)
112 {
113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
114 ssa_op_iter op_iter;
115 imm_use_iterator imm_iter;
116 use_operand_p use_p;
117 def_operand_p def_p;
118
119 *relevant = vect_unused_in_scope;
120 *live_p = false;
121
122 /* cond stmt other than loop exit cond. */
123 if (is_ctrl_stmt (stmt)
124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
125 != loop_exit_ctrl_vec_info_type)
126 *relevant = vect_used_in_scope;
127
128 /* changing memory. */
129 if (gimple_code (stmt) != GIMPLE_PHI)
130 if (gimple_vdef (stmt))
131 {
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
134 *relevant = vect_used_in_scope;
135 }
136
137 /* uses outside the loop. */
138 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
139 {
140 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
141 {
142 basic_block bb = gimple_bb (USE_STMT (use_p));
143 if (!flow_bb_inside_loop_p (loop, bb))
144 {
145 if (vect_print_dump_info (REPORT_DETAILS))
146 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
147
148 if (is_gimple_debug (USE_STMT (use_p)))
149 continue;
150
151 /* We expect all such uses to be in the loop exit phis
152 (because of loop closed form) */
153 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
154 gcc_assert (bb == single_exit (loop)->dest);
155
156 *live_p = true;
157 }
158 }
159 }
160
161 return (*live_p || *relevant);
162 }
163
164
165 /* Function exist_non_indexing_operands_for_use_p
166
167 USE is one of the uses attached to STMT. Check if USE is
168 used in STMT for anything other than indexing an array. */
169
170 static bool
171 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
172 {
173 tree operand;
174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
175
176 /* USE corresponds to some operand in STMT. If there is no data
177 reference in STMT, then any operand that corresponds to USE
178 is not indexing an array. */
179 if (!STMT_VINFO_DATA_REF (stmt_info))
180 return true;
181
182 /* STMT has a data_ref. FORNOW this means that its of one of
183 the following forms:
184 -1- ARRAY_REF = var
185 -2- var = ARRAY_REF
186 (This should have been verified in analyze_data_refs).
187
188 'var' in the second case corresponds to a def, not a use,
189 so USE cannot correspond to any operands that are not used
190 for array indexing.
191
192 Therefore, all we need to check is if STMT falls into the
193 first case, and whether var corresponds to USE. */
194
195 if (!gimple_assign_copy_p (stmt))
196 return false;
197 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
198 return false;
199 operand = gimple_assign_rhs1 (stmt);
200 if (TREE_CODE (operand) != SSA_NAME)
201 return false;
202
203 if (operand == use)
204 return true;
205
206 return false;
207 }
208
209
210 /*
211 Function process_use.
212
213 Inputs:
214 - a USE in STMT in a loop represented by LOOP_VINFO
215 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
216 that defined USE. This is done by calling mark_relevant and passing it
217 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
218
219 Outputs:
220 Generally, LIVE_P and RELEVANT are used to define the liveness and
221 relevance info of the DEF_STMT of this USE:
222 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
223 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
224 Exceptions:
225 - case 1: If USE is used only for address computations (e.g. array indexing),
226 which does not need to be directly vectorized, then the liveness/relevance
227 of the respective DEF_STMT is left unchanged.
228 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
229 skip DEF_STMT cause it had already been processed.
230 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
231 be modified accordingly.
232
233 Return true if everything is as expected. Return false otherwise. */
234
235 static bool
236 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
237 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
238 {
239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
240 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
241 stmt_vec_info dstmt_vinfo;
242 basic_block bb, def_bb;
243 tree def;
244 gimple def_stmt;
245 enum vect_def_type dt;
246
247 /* case 1: we are only interested in uses that need to be vectorized. Uses
248 that are used for address computation are not considered relevant. */
249 if (!exist_non_indexing_operands_for_use_p (use, stmt))
250 return true;
251
252 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
253 {
254 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
255 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
256 return false;
257 }
258
259 if (!def_stmt || gimple_nop_p (def_stmt))
260 return true;
261
262 def_bb = gimple_bb (def_stmt);
263 if (!flow_bb_inside_loop_p (loop, def_bb))
264 {
265 if (vect_print_dump_info (REPORT_DETAILS))
266 fprintf (vect_dump, "def_stmt is out of loop.");
267 return true;
268 }
269
270 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
271 DEF_STMT must have already been processed, because this should be the
272 only way that STMT, which is a reduction-phi, was put in the worklist,
273 as there should be no other uses for DEF_STMT in the loop. So we just
274 check that everything is as expected, and we are done. */
275 dstmt_vinfo = vinfo_for_stmt (def_stmt);
276 bb = gimple_bb (stmt);
277 if (gimple_code (stmt) == GIMPLE_PHI
278 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
279 && gimple_code (def_stmt) != GIMPLE_PHI
280 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
281 && bb->loop_father == def_bb->loop_father)
282 {
283 if (vect_print_dump_info (REPORT_DETAILS))
284 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
285 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
286 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
287 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
288 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
289 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
290 return true;
291 }
292
293 /* case 3a: outer-loop stmt defining an inner-loop stmt:
294 outer-loop-header-bb:
295 d = def_stmt
296 inner-loop:
297 stmt # use (d)
298 outer-loop-tail-bb:
299 ... */
300 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
301 {
302 if (vect_print_dump_info (REPORT_DETAILS))
303 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
304
305 switch (relevant)
306 {
307 case vect_unused_in_scope:
308 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
309 vect_used_in_scope : vect_unused_in_scope;
310 break;
311
312 case vect_used_in_outer_by_reduction:
313 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
314 relevant = vect_used_by_reduction;
315 break;
316
317 case vect_used_in_outer:
318 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
319 relevant = vect_used_in_scope;
320 break;
321
322 case vect_used_in_scope:
323 break;
324
325 default:
326 gcc_unreachable ();
327 }
328 }
329
330 /* case 3b: inner-loop stmt defining an outer-loop stmt:
331 outer-loop-header-bb:
332 ...
333 inner-loop:
334 d = def_stmt
335 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
336 stmt # use (d) */
337 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
338 {
339 if (vect_print_dump_info (REPORT_DETAILS))
340 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
341
342 switch (relevant)
343 {
344 case vect_unused_in_scope:
345 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
346 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
347 vect_used_in_outer_by_reduction : vect_unused_in_scope;
348 break;
349
350 case vect_used_by_reduction:
351 relevant = vect_used_in_outer_by_reduction;
352 break;
353
354 case vect_used_in_scope:
355 relevant = vect_used_in_outer;
356 break;
357
358 default:
359 gcc_unreachable ();
360 }
361 }
362
363 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
364 return true;
365 }
366
367
368 /* Function vect_mark_stmts_to_be_vectorized.
369
370 Not all stmts in the loop need to be vectorized. For example:
371
372 for i...
373 for j...
374 1. T0 = i + j
375 2. T1 = a[T0]
376
377 3. j = j + 1
378
379 Stmt 1 and 3 do not need to be vectorized, because loop control and
380 addressing of vectorized data-refs are handled differently.
381
382 This pass detects such stmts. */
383
384 bool
385 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
386 {
387 VEC(gimple,heap) *worklist;
388 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
389 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
390 unsigned int nbbs = loop->num_nodes;
391 gimple_stmt_iterator si;
392 gimple stmt;
393 unsigned int i;
394 stmt_vec_info stmt_vinfo;
395 basic_block bb;
396 gimple phi;
397 bool live_p;
398 enum vect_relevant relevant, tmp_relevant;
399 enum vect_def_type def_type;
400
401 if (vect_print_dump_info (REPORT_DETAILS))
402 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
403
404 worklist = VEC_alloc (gimple, heap, 64);
405
406 /* 1. Init worklist. */
407 for (i = 0; i < nbbs; i++)
408 {
409 bb = bbs[i];
410 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
411 {
412 phi = gsi_stmt (si);
413 if (vect_print_dump_info (REPORT_DETAILS))
414 {
415 fprintf (vect_dump, "init: phi relevant? ");
416 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
417 }
418
419 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
420 vect_mark_relevant (&worklist, phi, relevant, live_p);
421 }
422 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
423 {
424 stmt = gsi_stmt (si);
425 if (vect_print_dump_info (REPORT_DETAILS))
426 {
427 fprintf (vect_dump, "init: stmt relevant? ");
428 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
429 }
430
431 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
432 vect_mark_relevant (&worklist, stmt, relevant, live_p);
433 }
434 }
435
436 /* 2. Process_worklist */
437 while (VEC_length (gimple, worklist) > 0)
438 {
439 use_operand_p use_p;
440 ssa_op_iter iter;
441
442 stmt = VEC_pop (gimple, worklist);
443 if (vect_print_dump_info (REPORT_DETAILS))
444 {
445 fprintf (vect_dump, "worklist: examine stmt: ");
446 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
447 }
448
449 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
450 (DEF_STMT) as relevant/irrelevant and live/dead according to the
451 liveness and relevance properties of STMT. */
452 stmt_vinfo = vinfo_for_stmt (stmt);
453 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
454 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
455
456 /* Generally, the liveness and relevance properties of STMT are
457 propagated as is to the DEF_STMTs of its USEs:
458 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
459 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
460
461 One exception is when STMT has been identified as defining a reduction
462 variable; in this case we set the liveness/relevance as follows:
463 live_p = false
464 relevant = vect_used_by_reduction
465 This is because we distinguish between two kinds of relevant stmts -
466 those that are used by a reduction computation, and those that are
467 (also) used by a regular computation. This allows us later on to
468 identify stmts that are used solely by a reduction, and therefore the
469 order of the results that they produce does not have to be kept. */
470
471 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
472 tmp_relevant = relevant;
473 switch (def_type)
474 {
475 case vect_reduction_def:
476 switch (tmp_relevant)
477 {
478 case vect_unused_in_scope:
479 relevant = vect_used_by_reduction;
480 break;
481
482 case vect_used_by_reduction:
483 if (gimple_code (stmt) == GIMPLE_PHI)
484 break;
485 /* fall through */
486
487 default:
488 if (vect_print_dump_info (REPORT_DETAILS))
489 fprintf (vect_dump, "unsupported use of reduction.");
490
491 VEC_free (gimple, heap, worklist);
492 return false;
493 }
494
495 live_p = false;
496 break;
497
498 case vect_nested_cycle:
499 if (tmp_relevant != vect_unused_in_scope
500 && tmp_relevant != vect_used_in_outer_by_reduction
501 && tmp_relevant != vect_used_in_outer)
502 {
503 if (vect_print_dump_info (REPORT_DETAILS))
504 fprintf (vect_dump, "unsupported use of nested cycle.");
505
506 VEC_free (gimple, heap, worklist);
507 return false;
508 }
509
510 live_p = false;
511 break;
512
513 case vect_double_reduction_def:
514 if (tmp_relevant != vect_unused_in_scope
515 && tmp_relevant != vect_used_by_reduction)
516 {
517 if (vect_print_dump_info (REPORT_DETAILS))
518 fprintf (vect_dump, "unsupported use of double reduction.");
519
520 VEC_free (gimple, heap, worklist);
521 return false;
522 }
523
524 live_p = false;
525 break;
526
527 default:
528 break;
529 }
530
531 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
532 {
533 tree op = USE_FROM_PTR (use_p);
534 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
535 {
536 VEC_free (gimple, heap, worklist);
537 return false;
538 }
539 }
540 } /* while worklist */
541
542 VEC_free (gimple, heap, worklist);
543 return true;
544 }
545
546
547 int
548 cost_for_stmt (gimple stmt)
549 {
550 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
551
552 switch (STMT_VINFO_TYPE (stmt_info))
553 {
554 case load_vec_info_type:
555 return TARG_SCALAR_LOAD_COST;
556 case store_vec_info_type:
557 return TARG_SCALAR_STORE_COST;
558 case op_vec_info_type:
559 case condition_vec_info_type:
560 case assignment_vec_info_type:
561 case reduc_vec_info_type:
562 case induc_vec_info_type:
563 case type_promotion_vec_info_type:
564 case type_demotion_vec_info_type:
565 case type_conversion_vec_info_type:
566 case call_vec_info_type:
567 return TARG_SCALAR_STMT_COST;
568 case undef_vec_info_type:
569 default:
570 gcc_unreachable ();
571 }
572 }
573
574 /* Function vect_model_simple_cost.
575
576 Models cost for simple operations, i.e. those that only emit ncopies of a
577 single op. Right now, this does not account for multiple insns that could
578 be generated for the single vector op. We will handle that shortly. */
579
580 void
581 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
582 enum vect_def_type *dt, slp_tree slp_node)
583 {
584 int i;
585 int inside_cost = 0, outside_cost = 0;
586
587 /* The SLP costs were already calculated during SLP tree build. */
588 if (PURE_SLP_STMT (stmt_info))
589 return;
590
591 inside_cost = ncopies * TARG_VEC_STMT_COST;
592
593 /* FORNOW: Assuming maximum 2 args per stmts. */
594 for (i = 0; i < 2; i++)
595 {
596 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
597 outside_cost += TARG_SCALAR_TO_VEC_COST;
598 }
599
600 if (vect_print_dump_info (REPORT_COST))
601 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
602 "outside_cost = %d .", inside_cost, outside_cost);
603
604 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
605 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
606 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
607 }
608
609
610 /* Function vect_cost_strided_group_size
611
612 For strided load or store, return the group_size only if it is the first
613 load or store of a group, else return 1. This ensures that group size is
614 only returned once per group. */
615
616 static int
617 vect_cost_strided_group_size (stmt_vec_info stmt_info)
618 {
619 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
620
621 if (first_stmt == STMT_VINFO_STMT (stmt_info))
622 return DR_GROUP_SIZE (stmt_info);
623
624 return 1;
625 }
626
627
628 /* Function vect_model_store_cost
629
630 Models cost for stores. In the case of strided accesses, one access
631 has the overhead of the strided access attributed to it. */
632
633 void
634 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
635 enum vect_def_type dt, slp_tree slp_node)
636 {
637 int group_size;
638 int inside_cost = 0, outside_cost = 0;
639
640 /* The SLP costs were already calculated during SLP tree build. */
641 if (PURE_SLP_STMT (stmt_info))
642 return;
643
644 if (dt == vect_constant_def || dt == vect_external_def)
645 outside_cost = TARG_SCALAR_TO_VEC_COST;
646
647 /* Strided access? */
648 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
649 group_size = vect_cost_strided_group_size (stmt_info);
650 /* Not a strided access. */
651 else
652 group_size = 1;
653
654 /* Is this an access in a group of stores, which provide strided access?
655 If so, add in the cost of the permutes. */
656 if (group_size > 1)
657 {
658 /* Uses a high and low interleave operation for each needed permute. */
659 inside_cost = ncopies * exact_log2(group_size) * group_size
660 * TARG_VEC_STMT_COST;
661
662 if (vect_print_dump_info (REPORT_COST))
663 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
664 group_size);
665
666 }
667
668 /* Costs of the stores. */
669 inside_cost += ncopies * TARG_VEC_STORE_COST;
670
671 if (vect_print_dump_info (REPORT_COST))
672 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
673 "outside_cost = %d .", inside_cost, outside_cost);
674
675 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
676 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
677 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
678 }
679
680
681 /* Function vect_model_load_cost
682
683 Models cost for loads. In the case of strided accesses, the last access
684 has the overhead of the strided access attributed to it. Since unaligned
685 accesses are supported for loads, we also account for the costs of the
686 access scheme chosen. */
687
688 void
689 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
690
691 {
692 int group_size;
693 int alignment_support_cheme;
694 gimple first_stmt;
695 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
696 int inside_cost = 0, outside_cost = 0;
697
698 /* The SLP costs were already calculated during SLP tree build. */
699 if (PURE_SLP_STMT (stmt_info))
700 return;
701
702 /* Strided accesses? */
703 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
704 if (first_stmt && !slp_node)
705 {
706 group_size = vect_cost_strided_group_size (stmt_info);
707 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
708 }
709 /* Not a strided access. */
710 else
711 {
712 group_size = 1;
713 first_dr = dr;
714 }
715
716 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
717
718 /* Is this an access in a group of loads providing strided access?
719 If so, add in the cost of the permutes. */
720 if (group_size > 1)
721 {
722 /* Uses an even and odd extract operations for each needed permute. */
723 inside_cost = ncopies * exact_log2(group_size) * group_size
724 * TARG_VEC_STMT_COST;
725
726 if (vect_print_dump_info (REPORT_COST))
727 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
728 group_size);
729
730 }
731
732 /* The loads themselves. */
733 switch (alignment_support_cheme)
734 {
735 case dr_aligned:
736 {
737 inside_cost += ncopies * TARG_VEC_LOAD_COST;
738
739 if (vect_print_dump_info (REPORT_COST))
740 fprintf (vect_dump, "vect_model_load_cost: aligned.");
741
742 break;
743 }
744 case dr_unaligned_supported:
745 {
746 /* Here, we assign an additional cost for the unaligned load. */
747 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
748
749 if (vect_print_dump_info (REPORT_COST))
750 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
751 "hardware.");
752
753 break;
754 }
755 case dr_explicit_realign:
756 {
757 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
758
759 /* FIXME: If the misalignment remains fixed across the iterations of
760 the containing loop, the following cost should be added to the
761 outside costs. */
762 if (targetm.vectorize.builtin_mask_for_load)
763 inside_cost += TARG_VEC_STMT_COST;
764
765 break;
766 }
767 case dr_explicit_realign_optimized:
768 {
769 if (vect_print_dump_info (REPORT_COST))
770 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
771 "pipelined.");
772
773 /* Unaligned software pipeline has a load of an address, an initial
774 load, and possibly a mask operation to "prime" the loop. However,
775 if this is an access in a group of loads, which provide strided
776 access, then the above cost should only be considered for one
777 access in the group. Inside the loop, there is a load op
778 and a realignment op. */
779
780 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
781 {
782 outside_cost = 2*TARG_VEC_STMT_COST;
783 if (targetm.vectorize.builtin_mask_for_load)
784 outside_cost += TARG_VEC_STMT_COST;
785 }
786
787 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
788
789 break;
790 }
791
792 default:
793 gcc_unreachable ();
794 }
795
796 if (vect_print_dump_info (REPORT_COST))
797 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
798 "outside_cost = %d .", inside_cost, outside_cost);
799
800 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
801 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
802 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
803 }
804
805
806 /* Function vect_init_vector.
807
808 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
809 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
810 is not NULL. Otherwise, place the initialization at the loop preheader.
811 Return the DEF of INIT_STMT.
812 It will be used in the vectorization of STMT. */
813
814 tree
815 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
816 gimple_stmt_iterator *gsi)
817 {
818 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
819 tree new_var;
820 gimple init_stmt;
821 tree vec_oprnd;
822 edge pe;
823 tree new_temp;
824 basic_block new_bb;
825
826 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
827 add_referenced_var (new_var);
828 init_stmt = gimple_build_assign (new_var, vector_var);
829 new_temp = make_ssa_name (new_var, init_stmt);
830 gimple_assign_set_lhs (init_stmt, new_temp);
831
832 if (gsi)
833 vect_finish_stmt_generation (stmt, init_stmt, gsi);
834 else
835 {
836 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
837
838 if (loop_vinfo)
839 {
840 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
841
842 if (nested_in_vect_loop_p (loop, stmt))
843 loop = loop->inner;
844
845 pe = loop_preheader_edge (loop);
846 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
847 gcc_assert (!new_bb);
848 }
849 else
850 {
851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
852 basic_block bb;
853 gimple_stmt_iterator gsi_bb_start;
854
855 gcc_assert (bb_vinfo);
856 bb = BB_VINFO_BB (bb_vinfo);
857 gsi_bb_start = gsi_after_labels (bb);
858 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
859 }
860 }
861
862 if (vect_print_dump_info (REPORT_DETAILS))
863 {
864 fprintf (vect_dump, "created new init_stmt: ");
865 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
866 }
867
868 vec_oprnd = gimple_assign_lhs (init_stmt);
869 return vec_oprnd;
870 }
871
872
873 /* Function vect_get_vec_def_for_operand.
874
875 OP is an operand in STMT. This function returns a (vector) def that will be
876 used in the vectorized stmt for STMT.
877
878 In the case that OP is an SSA_NAME which is defined in the loop, then
879 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
880
881 In case OP is an invariant or constant, a new stmt that creates a vector def
882 needs to be introduced. */
883
884 tree
885 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
886 {
887 tree vec_oprnd;
888 gimple vec_stmt;
889 gimple def_stmt;
890 stmt_vec_info def_stmt_info = NULL;
891 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
892 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
893 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
894 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
895 tree vec_inv;
896 tree vec_cst;
897 tree t = NULL_TREE;
898 tree def;
899 int i;
900 enum vect_def_type dt;
901 bool is_simple_use;
902 tree vector_type;
903
904 if (vect_print_dump_info (REPORT_DETAILS))
905 {
906 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
907 print_generic_expr (vect_dump, op, TDF_SLIM);
908 }
909
910 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
911 &dt);
912 gcc_assert (is_simple_use);
913 if (vect_print_dump_info (REPORT_DETAILS))
914 {
915 if (def)
916 {
917 fprintf (vect_dump, "def = ");
918 print_generic_expr (vect_dump, def, TDF_SLIM);
919 }
920 if (def_stmt)
921 {
922 fprintf (vect_dump, " def_stmt = ");
923 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
924 }
925 }
926
927 switch (dt)
928 {
929 /* Case 1: operand is a constant. */
930 case vect_constant_def:
931 {
932 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
933 gcc_assert (vector_type);
934
935 if (scalar_def)
936 *scalar_def = op;
937
938 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
939 if (vect_print_dump_info (REPORT_DETAILS))
940 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
941
942 for (i = nunits - 1; i >= 0; --i)
943 {
944 t = tree_cons (NULL_TREE, op, t);
945 }
946 vec_cst = build_vector (vector_type, t);
947 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
948 }
949
950 /* Case 2: operand is defined outside the loop - loop invariant. */
951 case vect_external_def:
952 {
953 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
954 gcc_assert (vector_type);
955 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
956
957 if (scalar_def)
958 *scalar_def = def;
959
960 /* Create 'vec_inv = {inv,inv,..,inv}' */
961 if (vect_print_dump_info (REPORT_DETAILS))
962 fprintf (vect_dump, "Create vector_inv.");
963
964 for (i = nunits - 1; i >= 0; --i)
965 {
966 t = tree_cons (NULL_TREE, def, t);
967 }
968
969 /* FIXME: use build_constructor directly. */
970 vec_inv = build_constructor_from_list (vector_type, t);
971 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
972 }
973
974 /* Case 3: operand is defined inside the loop. */
975 case vect_internal_def:
976 {
977 if (scalar_def)
978 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
979
980 /* Get the def from the vectorized stmt. */
981 def_stmt_info = vinfo_for_stmt (def_stmt);
982 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
983 gcc_assert (vec_stmt);
984 if (gimple_code (vec_stmt) == GIMPLE_PHI)
985 vec_oprnd = PHI_RESULT (vec_stmt);
986 else if (is_gimple_call (vec_stmt))
987 vec_oprnd = gimple_call_lhs (vec_stmt);
988 else
989 vec_oprnd = gimple_assign_lhs (vec_stmt);
990 return vec_oprnd;
991 }
992
993 /* Case 4: operand is defined by a loop header phi - reduction */
994 case vect_reduction_def:
995 case vect_double_reduction_def:
996 case vect_nested_cycle:
997 {
998 struct loop *loop;
999
1000 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1001 loop = (gimple_bb (def_stmt))->loop_father;
1002
1003 /* Get the def before the loop */
1004 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1005 return get_initial_def_for_reduction (stmt, op, scalar_def);
1006 }
1007
1008 /* Case 5: operand is defined by loop-header phi - induction. */
1009 case vect_induction_def:
1010 {
1011 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1012
1013 /* Get the def from the vectorized stmt. */
1014 def_stmt_info = vinfo_for_stmt (def_stmt);
1015 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1016 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
1017 vec_oprnd = PHI_RESULT (vec_stmt);
1018 return vec_oprnd;
1019 }
1020
1021 default:
1022 gcc_unreachable ();
1023 }
1024 }
1025
1026
1027 /* Function vect_get_vec_def_for_stmt_copy
1028
1029 Return a vector-def for an operand. This function is used when the
1030 vectorized stmt to be created (by the caller to this function) is a "copy"
1031 created in case the vectorized result cannot fit in one vector, and several
1032 copies of the vector-stmt are required. In this case the vector-def is
1033 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1034 of the stmt that defines VEC_OPRND.
1035 DT is the type of the vector def VEC_OPRND.
1036
1037 Context:
1038 In case the vectorization factor (VF) is bigger than the number
1039 of elements that can fit in a vectype (nunits), we have to generate
1040 more than one vector stmt to vectorize the scalar stmt. This situation
1041 arises when there are multiple data-types operated upon in the loop; the
1042 smallest data-type determines the VF, and as a result, when vectorizing
1043 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1044 vector stmt (each computing a vector of 'nunits' results, and together
1045 computing 'VF' results in each iteration). This function is called when
1046 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1047 which VF=16 and nunits=4, so the number of copies required is 4):
1048
1049 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1050
1051 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1052 VS1.1: vx.1 = memref1 VS1.2
1053 VS1.2: vx.2 = memref2 VS1.3
1054 VS1.3: vx.3 = memref3
1055
1056 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1057 VSnew.1: vz1 = vx.1 + ... VSnew.2
1058 VSnew.2: vz2 = vx.2 + ... VSnew.3
1059 VSnew.3: vz3 = vx.3 + ...
1060
1061 The vectorization of S1 is explained in vectorizable_load.
1062 The vectorization of S2:
1063 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1064 the function 'vect_get_vec_def_for_operand' is called to
1065 get the relevant vector-def for each operand of S2. For operand x it
1066 returns the vector-def 'vx.0'.
1067
1068 To create the remaining copies of the vector-stmt (VSnew.j), this
1069 function is called to get the relevant vector-def for each operand. It is
1070 obtained from the respective VS1.j stmt, which is recorded in the
1071 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1072
1073 For example, to obtain the vector-def 'vx.1' in order to create the
1074 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1075 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1076 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1077 and return its def ('vx.1').
1078 Overall, to create the above sequence this function will be called 3 times:
1079 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1080 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1081 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1082
1083 tree
1084 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1085 {
1086 gimple vec_stmt_for_operand;
1087 stmt_vec_info def_stmt_info;
1088
1089 /* Do nothing; can reuse same def. */
1090 if (dt == vect_external_def || dt == vect_constant_def )
1091 return vec_oprnd;
1092
1093 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1094 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1095 gcc_assert (def_stmt_info);
1096 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1097 gcc_assert (vec_stmt_for_operand);
1098 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1099 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1100 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1101 else
1102 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1103 return vec_oprnd;
1104 }
1105
1106
1107 /* Get vectorized definitions for the operands to create a copy of an original
1108 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1109
1110 static void
1111 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1112 VEC(tree,heap) **vec_oprnds0,
1113 VEC(tree,heap) **vec_oprnds1)
1114 {
1115 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1116
1117 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1118 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1119
1120 if (vec_oprnds1 && *vec_oprnds1)
1121 {
1122 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1123 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1124 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1125 }
1126 }
1127
1128
1129 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1130
1131 static void
1132 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1133 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1134 slp_tree slp_node)
1135 {
1136 if (slp_node)
1137 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1138 else
1139 {
1140 tree vec_oprnd;
1141
1142 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1143 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1144 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1145
1146 if (op1)
1147 {
1148 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1149 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1150 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1151 }
1152 }
1153 }
1154
1155
1156 /* Function vect_finish_stmt_generation.
1157
1158 Insert a new stmt. */
1159
1160 void
1161 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1162 gimple_stmt_iterator *gsi)
1163 {
1164 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1166 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1167
1168 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1169
1170 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1171
1172 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1173 bb_vinfo));
1174
1175 if (vect_print_dump_info (REPORT_DETAILS))
1176 {
1177 fprintf (vect_dump, "add new stmt: ");
1178 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1179 }
1180
1181 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1182 }
1183
1184 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1185 a function declaration if the target has a vectorized version
1186 of the function, or NULL_TREE if the function cannot be vectorized. */
1187
1188 tree
1189 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1190 {
1191 tree fndecl = gimple_call_fndecl (call);
1192 enum built_in_function code;
1193
1194 /* We only handle functions that do not read or clobber memory -- i.e.
1195 const or novops ones. */
1196 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1197 return NULL_TREE;
1198
1199 if (!fndecl
1200 || TREE_CODE (fndecl) != FUNCTION_DECL
1201 || !DECL_BUILT_IN (fndecl))
1202 return NULL_TREE;
1203
1204 code = DECL_FUNCTION_CODE (fndecl);
1205 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1206 vectype_in);
1207 }
1208
1209 /* Function vectorizable_call.
1210
1211 Check if STMT performs a function call that can be vectorized.
1212 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1213 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1214 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1215
1216 static bool
1217 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1218 {
1219 tree vec_dest;
1220 tree scalar_dest;
1221 tree op, type;
1222 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1223 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1224 tree vectype_out, vectype_in;
1225 int nunits_in;
1226 int nunits_out;
1227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1228 tree fndecl, new_temp, def, rhs_type, lhs_type;
1229 gimple def_stmt;
1230 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1231 gimple new_stmt = NULL;
1232 int ncopies, j;
1233 VEC(tree, heap) *vargs = NULL;
1234 enum { NARROW, NONE, WIDEN } modifier;
1235 size_t i, nargs;
1236
1237 /* FORNOW: unsupported in basic block SLP. */
1238 gcc_assert (loop_vinfo);
1239
1240 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1241 return false;
1242
1243 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1244 return false;
1245
1246 /* FORNOW: SLP not supported. */
1247 if (STMT_SLP_TYPE (stmt_info))
1248 return false;
1249
1250 /* Is STMT a vectorizable call? */
1251 if (!is_gimple_call (stmt))
1252 return false;
1253
1254 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1255 return false;
1256
1257 /* Process function arguments. */
1258 rhs_type = NULL_TREE;
1259 nargs = gimple_call_num_args (stmt);
1260
1261 /* Bail out if the function has more than two arguments, we
1262 do not have interesting builtin functions to vectorize with
1263 more than two arguments. No arguments is also not good. */
1264 if (nargs == 0 || nargs > 2)
1265 return false;
1266
1267 for (i = 0; i < nargs; i++)
1268 {
1269 op = gimple_call_arg (stmt, i);
1270
1271 /* We can only handle calls with arguments of the same type. */
1272 if (rhs_type
1273 && rhs_type != TREE_TYPE (op))
1274 {
1275 if (vect_print_dump_info (REPORT_DETAILS))
1276 fprintf (vect_dump, "argument types differ.");
1277 return false;
1278 }
1279 rhs_type = TREE_TYPE (op);
1280
1281 if (!vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt[i]))
1282 {
1283 if (vect_print_dump_info (REPORT_DETAILS))
1284 fprintf (vect_dump, "use not simple.");
1285 return false;
1286 }
1287 }
1288
1289 vectype_in = get_vectype_for_scalar_type (rhs_type);
1290 if (!vectype_in)
1291 return false;
1292 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1293
1294 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1295 vectype_out = get_vectype_for_scalar_type (lhs_type);
1296 if (!vectype_out)
1297 return false;
1298 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1299
1300 /* FORNOW */
1301 if (nunits_in == nunits_out / 2)
1302 modifier = NARROW;
1303 else if (nunits_out == nunits_in)
1304 modifier = NONE;
1305 else if (nunits_out == nunits_in / 2)
1306 modifier = WIDEN;
1307 else
1308 return false;
1309
1310 /* For now, we only vectorize functions if a target specific builtin
1311 is available. TODO -- in some cases, it might be profitable to
1312 insert the calls for pieces of the vector, in order to be able
1313 to vectorize other operations in the loop. */
1314 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1315 if (fndecl == NULL_TREE)
1316 {
1317 if (vect_print_dump_info (REPORT_DETAILS))
1318 fprintf (vect_dump, "function is not vectorizable.");
1319
1320 return false;
1321 }
1322
1323 gcc_assert (!gimple_vuse (stmt));
1324
1325 if (modifier == NARROW)
1326 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1327 else
1328 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1329
1330 /* Sanity check: make sure that at least one copy of the vectorized stmt
1331 needs to be generated. */
1332 gcc_assert (ncopies >= 1);
1333
1334 if (!vec_stmt) /* transformation not required. */
1335 {
1336 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1337 if (vect_print_dump_info (REPORT_DETAILS))
1338 fprintf (vect_dump, "=== vectorizable_call ===");
1339 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1340 return true;
1341 }
1342
1343 /** Transform. **/
1344
1345 if (vect_print_dump_info (REPORT_DETAILS))
1346 fprintf (vect_dump, "transform operation.");
1347
1348 /* Handle def. */
1349 scalar_dest = gimple_call_lhs (stmt);
1350 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1351
1352 prev_stmt_info = NULL;
1353 switch (modifier)
1354 {
1355 case NONE:
1356 for (j = 0; j < ncopies; ++j)
1357 {
1358 /* Build argument list for the vectorized call. */
1359 if (j == 0)
1360 vargs = VEC_alloc (tree, heap, nargs);
1361 else
1362 VEC_truncate (tree, vargs, 0);
1363
1364 for (i = 0; i < nargs; i++)
1365 {
1366 op = gimple_call_arg (stmt, i);
1367 if (j == 0)
1368 vec_oprnd0
1369 = vect_get_vec_def_for_operand (op, stmt, NULL);
1370 else
1371 {
1372 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1373 vec_oprnd0
1374 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1375 }
1376
1377 VEC_quick_push (tree, vargs, vec_oprnd0);
1378 }
1379
1380 new_stmt = gimple_build_call_vec (fndecl, vargs);
1381 new_temp = make_ssa_name (vec_dest, new_stmt);
1382 gimple_call_set_lhs (new_stmt, new_temp);
1383
1384 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1385 mark_symbols_for_renaming (new_stmt);
1386
1387 if (j == 0)
1388 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1389 else
1390 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1391
1392 prev_stmt_info = vinfo_for_stmt (new_stmt);
1393 }
1394
1395 break;
1396
1397 case NARROW:
1398 for (j = 0; j < ncopies; ++j)
1399 {
1400 /* Build argument list for the vectorized call. */
1401 if (j == 0)
1402 vargs = VEC_alloc (tree, heap, nargs * 2);
1403 else
1404 VEC_truncate (tree, vargs, 0);
1405
1406 for (i = 0; i < nargs; i++)
1407 {
1408 op = gimple_call_arg (stmt, i);
1409 if (j == 0)
1410 {
1411 vec_oprnd0
1412 = vect_get_vec_def_for_operand (op, stmt, NULL);
1413 vec_oprnd1
1414 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1415 }
1416 else
1417 {
1418 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i);
1419 vec_oprnd0
1420 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1421 vec_oprnd1
1422 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1423 }
1424
1425 VEC_quick_push (tree, vargs, vec_oprnd0);
1426 VEC_quick_push (tree, vargs, vec_oprnd1);
1427 }
1428
1429 new_stmt = gimple_build_call_vec (fndecl, vargs);
1430 new_temp = make_ssa_name (vec_dest, new_stmt);
1431 gimple_call_set_lhs (new_stmt, new_temp);
1432
1433 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1434 mark_symbols_for_renaming (new_stmt);
1435
1436 if (j == 0)
1437 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1438 else
1439 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1440
1441 prev_stmt_info = vinfo_for_stmt (new_stmt);
1442 }
1443
1444 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1445
1446 break;
1447
1448 case WIDEN:
1449 /* No current target implements this case. */
1450 return false;
1451 }
1452
1453 VEC_free (tree, heap, vargs);
1454
1455 /* Update the exception handling table with the vector stmt if necessary. */
1456 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1457 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1458
1459 /* The call in STMT might prevent it from being removed in dce.
1460 We however cannot remove it here, due to the way the ssa name
1461 it defines is mapped to the new definition. So just replace
1462 rhs of the statement with something harmless. */
1463
1464 type = TREE_TYPE (scalar_dest);
1465 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1466 fold_convert (type, integer_zero_node));
1467 set_vinfo_for_stmt (new_stmt, stmt_info);
1468 set_vinfo_for_stmt (stmt, NULL);
1469 STMT_VINFO_STMT (stmt_info) = new_stmt;
1470 gsi_replace (gsi, new_stmt, false);
1471 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1472
1473 return true;
1474 }
1475
1476
1477 /* Function vect_gen_widened_results_half
1478
1479 Create a vector stmt whose code, type, number of arguments, and result
1480 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1481 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1482 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1483 needs to be created (DECL is a function-decl of a target-builtin).
1484 STMT is the original scalar stmt that we are vectorizing. */
1485
1486 static gimple
1487 vect_gen_widened_results_half (enum tree_code code,
1488 tree decl,
1489 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1490 tree vec_dest, gimple_stmt_iterator *gsi,
1491 gimple stmt)
1492 {
1493 gimple new_stmt;
1494 tree new_temp;
1495
1496 /* Generate half of the widened result: */
1497 if (code == CALL_EXPR)
1498 {
1499 /* Target specific support */
1500 if (op_type == binary_op)
1501 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1502 else
1503 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1504 new_temp = make_ssa_name (vec_dest, new_stmt);
1505 gimple_call_set_lhs (new_stmt, new_temp);
1506 }
1507 else
1508 {
1509 /* Generic support */
1510 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1511 if (op_type != binary_op)
1512 vec_oprnd1 = NULL;
1513 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1514 vec_oprnd1);
1515 new_temp = make_ssa_name (vec_dest, new_stmt);
1516 gimple_assign_set_lhs (new_stmt, new_temp);
1517 }
1518 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1519
1520 return new_stmt;
1521 }
1522
1523
1524 /* Check if STMT performs a conversion operation, that can be vectorized.
1525 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1526 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1527 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1528
1529 static bool
1530 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1531 gimple *vec_stmt, slp_tree slp_node)
1532 {
1533 tree vec_dest;
1534 tree scalar_dest;
1535 tree op0;
1536 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1537 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1538 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1539 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1540 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1541 tree new_temp;
1542 tree def;
1543 gimple def_stmt;
1544 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1545 gimple new_stmt = NULL;
1546 stmt_vec_info prev_stmt_info;
1547 int nunits_in;
1548 int nunits_out;
1549 tree vectype_out, vectype_in;
1550 int ncopies, j;
1551 tree rhs_type, lhs_type;
1552 tree builtin_decl;
1553 enum { NARROW, NONE, WIDEN } modifier;
1554 int i;
1555 VEC(tree,heap) *vec_oprnds0 = NULL;
1556 tree vop0;
1557 tree integral_type;
1558 VEC(tree,heap) *dummy = NULL;
1559 int dummy_int;
1560
1561 /* Is STMT a vectorizable conversion? */
1562
1563 /* FORNOW: unsupported in basic block SLP. */
1564 gcc_assert (loop_vinfo);
1565
1566 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1567 return false;
1568
1569 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1570 return false;
1571
1572 if (!is_gimple_assign (stmt))
1573 return false;
1574
1575 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1576 return false;
1577
1578 code = gimple_assign_rhs_code (stmt);
1579 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1580 return false;
1581
1582 /* Check types of lhs and rhs. */
1583 op0 = gimple_assign_rhs1 (stmt);
1584 rhs_type = TREE_TYPE (op0);
1585 vectype_in = get_vectype_for_scalar_type (rhs_type);
1586 if (!vectype_in)
1587 return false;
1588 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1589
1590 scalar_dest = gimple_assign_lhs (stmt);
1591 lhs_type = TREE_TYPE (scalar_dest);
1592 vectype_out = get_vectype_for_scalar_type (lhs_type);
1593 if (!vectype_out)
1594 return false;
1595 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1596
1597 /* FORNOW */
1598 if (nunits_in == nunits_out / 2)
1599 modifier = NARROW;
1600 else if (nunits_out == nunits_in)
1601 modifier = NONE;
1602 else if (nunits_out == nunits_in / 2)
1603 modifier = WIDEN;
1604 else
1605 return false;
1606
1607 if (modifier == NONE)
1608 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1609
1610 /* Bail out if the types are both integral or non-integral. */
1611 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1612 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1613 return false;
1614
1615 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1616
1617 if (modifier == NARROW)
1618 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1619 else
1620 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1621
1622 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1623 this, so we can safely override NCOPIES with 1 here. */
1624 if (slp_node)
1625 ncopies = 1;
1626
1627 /* Sanity check: make sure that at least one copy of the vectorized stmt
1628 needs to be generated. */
1629 gcc_assert (ncopies >= 1);
1630
1631 /* Check the operands of the operation. */
1632 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
1633 {
1634 if (vect_print_dump_info (REPORT_DETAILS))
1635 fprintf (vect_dump, "use not simple.");
1636 return false;
1637 }
1638
1639 /* Supportable by target? */
1640 if ((modifier == NONE
1641 && !targetm.vectorize.builtin_conversion (code, integral_type))
1642 || (modifier == WIDEN
1643 && !supportable_widening_operation (code, stmt, vectype_in,
1644 &decl1, &decl2,
1645 &code1, &code2,
1646 &dummy_int, &dummy))
1647 || (modifier == NARROW
1648 && !supportable_narrowing_operation (code, stmt, vectype_in,
1649 &code1, &dummy_int, &dummy)))
1650 {
1651 if (vect_print_dump_info (REPORT_DETAILS))
1652 fprintf (vect_dump, "conversion not supported by target.");
1653 return false;
1654 }
1655
1656 if (modifier != NONE)
1657 {
1658 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1659 /* FORNOW: SLP not supported. */
1660 if (STMT_SLP_TYPE (stmt_info))
1661 return false;
1662 }
1663
1664 if (!vec_stmt) /* transformation not required. */
1665 {
1666 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1667 return true;
1668 }
1669
1670 /** Transform. **/
1671 if (vect_print_dump_info (REPORT_DETAILS))
1672 fprintf (vect_dump, "transform conversion.");
1673
1674 /* Handle def. */
1675 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1676
1677 if (modifier == NONE && !slp_node)
1678 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1679
1680 prev_stmt_info = NULL;
1681 switch (modifier)
1682 {
1683 case NONE:
1684 for (j = 0; j < ncopies; j++)
1685 {
1686 if (j == 0)
1687 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1688 else
1689 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1690
1691 builtin_decl =
1692 targetm.vectorize.builtin_conversion (code, integral_type);
1693 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1694 {
1695 /* Arguments are ready. create the new vector stmt. */
1696 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1697 new_temp = make_ssa_name (vec_dest, new_stmt);
1698 gimple_call_set_lhs (new_stmt, new_temp);
1699 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1700 if (slp_node)
1701 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1702 }
1703
1704 if (j == 0)
1705 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1706 else
1707 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1708 prev_stmt_info = vinfo_for_stmt (new_stmt);
1709 }
1710 break;
1711
1712 case WIDEN:
1713 /* In case the vectorization factor (VF) is bigger than the number
1714 of elements that we can fit in a vectype (nunits), we have to
1715 generate more than one vector stmt - i.e - we need to "unroll"
1716 the vector stmt by a factor VF/nunits. */
1717 for (j = 0; j < ncopies; j++)
1718 {
1719 if (j == 0)
1720 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1721 else
1722 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1723
1724 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1725
1726 /* Generate first half of the widened result: */
1727 new_stmt
1728 = vect_gen_widened_results_half (code1, decl1,
1729 vec_oprnd0, vec_oprnd1,
1730 unary_op, vec_dest, gsi, stmt);
1731 if (j == 0)
1732 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1733 else
1734 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1735 prev_stmt_info = vinfo_for_stmt (new_stmt);
1736
1737 /* Generate second half of the widened result: */
1738 new_stmt
1739 = vect_gen_widened_results_half (code2, decl2,
1740 vec_oprnd0, vec_oprnd1,
1741 unary_op, vec_dest, gsi, stmt);
1742 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1743 prev_stmt_info = vinfo_for_stmt (new_stmt);
1744 }
1745 break;
1746
1747 case NARROW:
1748 /* In case the vectorization factor (VF) is bigger than the number
1749 of elements that we can fit in a vectype (nunits), we have to
1750 generate more than one vector stmt - i.e - we need to "unroll"
1751 the vector stmt by a factor VF/nunits. */
1752 for (j = 0; j < ncopies; j++)
1753 {
1754 /* Handle uses. */
1755 if (j == 0)
1756 {
1757 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1758 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1759 }
1760 else
1761 {
1762 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1763 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1764 }
1765
1766 /* Arguments are ready. Create the new vector stmt. */
1767 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1768 vec_oprnd1);
1769 new_temp = make_ssa_name (vec_dest, new_stmt);
1770 gimple_assign_set_lhs (new_stmt, new_temp);
1771 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1772
1773 if (j == 0)
1774 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1775 else
1776 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1777
1778 prev_stmt_info = vinfo_for_stmt (new_stmt);
1779 }
1780
1781 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1782 }
1783
1784 if (vec_oprnds0)
1785 VEC_free (tree, heap, vec_oprnds0);
1786
1787 return true;
1788 }
1789 /* Function vectorizable_assignment.
1790
1791 Check if STMT performs an assignment (copy) that can be vectorized.
1792 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1793 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1794 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1795
1796 static bool
1797 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1798 gimple *vec_stmt, slp_tree slp_node)
1799 {
1800 tree vec_dest;
1801 tree scalar_dest;
1802 tree op;
1803 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1804 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1805 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1806 tree new_temp;
1807 tree def;
1808 gimple def_stmt;
1809 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1810 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1811 int ncopies;
1812 int i;
1813 VEC(tree,heap) *vec_oprnds = NULL;
1814 tree vop;
1815 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1816
1817 /* Multiple types in SLP are handled by creating the appropriate number of
1818 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1819 case of SLP. */
1820 if (slp_node)
1821 ncopies = 1;
1822 else
1823 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1824
1825 gcc_assert (ncopies >= 1);
1826 if (ncopies > 1)
1827 return false; /* FORNOW */
1828
1829 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1830 return false;
1831
1832 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1833 return false;
1834
1835 /* Is vectorizable assignment? */
1836 if (!is_gimple_assign (stmt))
1837 return false;
1838
1839 scalar_dest = gimple_assign_lhs (stmt);
1840 if (TREE_CODE (scalar_dest) != SSA_NAME)
1841 return false;
1842
1843 if (gimple_assign_single_p (stmt)
1844 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
1845 op = gimple_assign_rhs1 (stmt);
1846 else
1847 return false;
1848
1849 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
1850 {
1851 if (vect_print_dump_info (REPORT_DETAILS))
1852 fprintf (vect_dump, "use not simple.");
1853 return false;
1854 }
1855
1856 if (!vec_stmt) /* transformation not required. */
1857 {
1858 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1859 if (vect_print_dump_info (REPORT_DETAILS))
1860 fprintf (vect_dump, "=== vectorizable_assignment ===");
1861 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1862 return true;
1863 }
1864
1865 /** Transform. **/
1866 if (vect_print_dump_info (REPORT_DETAILS))
1867 fprintf (vect_dump, "transform assignment.");
1868
1869 /* Handle def. */
1870 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1871
1872 /* Handle use. */
1873 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1874
1875 /* Arguments are ready. create the new vector stmt. */
1876 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1877 {
1878 *vec_stmt = gimple_build_assign (vec_dest, vop);
1879 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1880 gimple_assign_set_lhs (*vec_stmt, new_temp);
1881 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
1882 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
1883
1884 if (slp_node)
1885 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
1886 }
1887
1888 VEC_free (tree, heap, vec_oprnds);
1889 return true;
1890 }
1891
1892 /* Function vectorizable_operation.
1893
1894 Check if STMT performs a binary or unary operation that can be vectorized.
1895 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1896 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1897 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1898
1899 static bool
1900 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1901 gimple *vec_stmt, slp_tree slp_node)
1902 {
1903 tree vec_dest;
1904 tree scalar_dest;
1905 tree op0, op1 = NULL;
1906 tree vec_oprnd1 = NULL_TREE;
1907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1909 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1910 enum tree_code code;
1911 enum machine_mode vec_mode;
1912 tree new_temp;
1913 int op_type;
1914 optab optab;
1915 int icode;
1916 enum machine_mode optab_op2_mode;
1917 tree def;
1918 gimple def_stmt;
1919 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1920 gimple new_stmt = NULL;
1921 stmt_vec_info prev_stmt_info;
1922 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1923 int nunits_out;
1924 tree vectype_out;
1925 int ncopies;
1926 int j, i;
1927 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1928 tree vop0, vop1;
1929 unsigned int k;
1930 bool scalar_shift_arg = false;
1931 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1932 int vf;
1933
1934 if (loop_vinfo)
1935 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1936 else
1937 /* FORNOW: multiple types are not supported in basic block SLP. */
1938 vf = nunits_in;
1939
1940 /* Multiple types in SLP are handled by creating the appropriate number of
1941 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1942 case of SLP. */
1943 if (slp_node)
1944 ncopies = 1;
1945 else
1946 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1947
1948 gcc_assert (ncopies >= 1);
1949
1950 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1951 return false;
1952
1953 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1954 return false;
1955
1956 /* Is STMT a vectorizable binary/unary operation? */
1957 if (!is_gimple_assign (stmt))
1958 return false;
1959
1960 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1961 return false;
1962
1963 scalar_dest = gimple_assign_lhs (stmt);
1964 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1965 if (!vectype_out)
1966 return false;
1967 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1968 if (nunits_out != nunits_in)
1969 return false;
1970
1971 code = gimple_assign_rhs_code (stmt);
1972
1973 /* For pointer addition, we should use the normal plus for
1974 the vector addition. */
1975 if (code == POINTER_PLUS_EXPR)
1976 code = PLUS_EXPR;
1977
1978 /* Support only unary or binary operations. */
1979 op_type = TREE_CODE_LENGTH (code);
1980 if (op_type != unary_op && op_type != binary_op)
1981 {
1982 if (vect_print_dump_info (REPORT_DETAILS))
1983 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1984 return false;
1985 }
1986
1987 op0 = gimple_assign_rhs1 (stmt);
1988 if (!vect_is_simple_use (op0, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
1989 {
1990 if (vect_print_dump_info (REPORT_DETAILS))
1991 fprintf (vect_dump, "use not simple.");
1992 return false;
1993 }
1994
1995 if (op_type == binary_op)
1996 {
1997 op1 = gimple_assign_rhs2 (stmt);
1998 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
1999 &dt[1]))
2000 {
2001 if (vect_print_dump_info (REPORT_DETAILS))
2002 fprintf (vect_dump, "use not simple.");
2003 return false;
2004 }
2005 }
2006
2007 /* If this is a shift/rotate, determine whether the shift amount is a vector,
2008 or scalar. If the shift/rotate amount is a vector, use the vector/vector
2009 shift optabs. */
2010 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2011 || code == RROTATE_EXPR)
2012 {
2013 /* vector shifted by vector */
2014 if (dt[1] == vect_internal_def)
2015 {
2016 optab = optab_for_tree_code (code, vectype, optab_vector);
2017 if (vect_print_dump_info (REPORT_DETAILS))
2018 fprintf (vect_dump, "vector/vector shift/rotate found.");
2019 }
2020
2021 /* See if the machine has a vector shifted by scalar insn and if not
2022 then see if it has a vector shifted by vector insn */
2023 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2024 {
2025 optab = optab_for_tree_code (code, vectype, optab_scalar);
2026 if (optab
2027 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2028 != CODE_FOR_nothing))
2029 {
2030 scalar_shift_arg = true;
2031 if (vect_print_dump_info (REPORT_DETAILS))
2032 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2033 }
2034 else
2035 {
2036 optab = optab_for_tree_code (code, vectype, optab_vector);
2037 if (optab
2038 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2039 != CODE_FOR_nothing))
2040 {
2041 if (vect_print_dump_info (REPORT_DETAILS))
2042 fprintf (vect_dump, "vector/vector shift/rotate found.");
2043
2044 /* Unlike the other binary operators, shifts/rotates have
2045 the rhs being int, instead of the same type as the lhs,
2046 so make sure the scalar is the right type if we are
2047 dealing with vectors of short/char. */
2048 if (dt[1] == vect_constant_def)
2049 op1 = fold_convert (TREE_TYPE (vectype), op1);
2050 }
2051 }
2052 }
2053
2054 else
2055 {
2056 if (vect_print_dump_info (REPORT_DETAILS))
2057 fprintf (vect_dump, "operand mode requires invariant argument.");
2058 return false;
2059 }
2060 }
2061 else
2062 optab = optab_for_tree_code (code, vectype, optab_default);
2063
2064 /* Supportable by target? */
2065 if (!optab)
2066 {
2067 if (vect_print_dump_info (REPORT_DETAILS))
2068 fprintf (vect_dump, "no optab.");
2069 return false;
2070 }
2071 vec_mode = TYPE_MODE (vectype);
2072 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2073 if (icode == CODE_FOR_nothing)
2074 {
2075 if (vect_print_dump_info (REPORT_DETAILS))
2076 fprintf (vect_dump, "op not supported by target.");
2077 /* Check only during analysis. */
2078 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2079 || (vf < vect_min_worthwhile_factor (code)
2080 && !vec_stmt))
2081 return false;
2082 if (vect_print_dump_info (REPORT_DETAILS))
2083 fprintf (vect_dump, "proceeding using word mode.");
2084 }
2085
2086 /* Worthwhile without SIMD support? Check only during analysis. */
2087 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2088 && vf < vect_min_worthwhile_factor (code)
2089 && !vec_stmt)
2090 {
2091 if (vect_print_dump_info (REPORT_DETAILS))
2092 fprintf (vect_dump, "not worthwhile without SIMD support.");
2093 return false;
2094 }
2095
2096 if (!vec_stmt) /* transformation not required. */
2097 {
2098 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2099 if (vect_print_dump_info (REPORT_DETAILS))
2100 fprintf (vect_dump, "=== vectorizable_operation ===");
2101 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2102 return true;
2103 }
2104
2105 /** Transform. **/
2106
2107 if (vect_print_dump_info (REPORT_DETAILS))
2108 fprintf (vect_dump, "transform binary/unary operation.");
2109
2110 /* Handle def. */
2111 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2112
2113 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2114 created in the previous stages of the recursion, so no allocation is
2115 needed, except for the case of shift with scalar shift argument. In that
2116 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2117 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2118 In case of loop-based vectorization we allocate VECs of size 1. We
2119 allocate VEC_OPRNDS1 only in case of binary operation. */
2120 if (!slp_node)
2121 {
2122 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2123 if (op_type == binary_op)
2124 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2125 }
2126 else if (scalar_shift_arg)
2127 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2128
2129 /* In case the vectorization factor (VF) is bigger than the number
2130 of elements that we can fit in a vectype (nunits), we have to generate
2131 more than one vector stmt - i.e - we need to "unroll" the
2132 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2133 from one copy of the vector stmt to the next, in the field
2134 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2135 stages to find the correct vector defs to be used when vectorizing
2136 stmts that use the defs of the current stmt. The example below illustrates
2137 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2138 4 vectorized stmts):
2139
2140 before vectorization:
2141 RELATED_STMT VEC_STMT
2142 S1: x = memref - -
2143 S2: z = x + 1 - -
2144
2145 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2146 there):
2147 RELATED_STMT VEC_STMT
2148 VS1_0: vx0 = memref0 VS1_1 -
2149 VS1_1: vx1 = memref1 VS1_2 -
2150 VS1_2: vx2 = memref2 VS1_3 -
2151 VS1_3: vx3 = memref3 - -
2152 S1: x = load - VS1_0
2153 S2: z = x + 1 - -
2154
2155 step2: vectorize stmt S2 (done here):
2156 To vectorize stmt S2 we first need to find the relevant vector
2157 def for the first operand 'x'. This is, as usual, obtained from
2158 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2159 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2160 relevant vector def 'vx0'. Having found 'vx0' we can generate
2161 the vector stmt VS2_0, and as usual, record it in the
2162 STMT_VINFO_VEC_STMT of stmt S2.
2163 When creating the second copy (VS2_1), we obtain the relevant vector
2164 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2165 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2166 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2167 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2168 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2169 chain of stmts and pointers:
2170 RELATED_STMT VEC_STMT
2171 VS1_0: vx0 = memref0 VS1_1 -
2172 VS1_1: vx1 = memref1 VS1_2 -
2173 VS1_2: vx2 = memref2 VS1_3 -
2174 VS1_3: vx3 = memref3 - -
2175 S1: x = load - VS1_0
2176 VS2_0: vz0 = vx0 + v1 VS2_1 -
2177 VS2_1: vz1 = vx1 + v1 VS2_2 -
2178 VS2_2: vz2 = vx2 + v1 VS2_3 -
2179 VS2_3: vz3 = vx3 + v1 - -
2180 S2: z = x + 1 - VS2_0 */
2181
2182 prev_stmt_info = NULL;
2183 for (j = 0; j < ncopies; j++)
2184 {
2185 /* Handle uses. */
2186 if (j == 0)
2187 {
2188 if (op_type == binary_op && scalar_shift_arg)
2189 {
2190 /* Vector shl and shr insn patterns can be defined with scalar
2191 operand 2 (shift operand). In this case, use constant or loop
2192 invariant op1 directly, without extending it to vector mode
2193 first. */
2194 optab_op2_mode = insn_data[icode].operand[2].mode;
2195 if (!VECTOR_MODE_P (optab_op2_mode))
2196 {
2197 if (vect_print_dump_info (REPORT_DETAILS))
2198 fprintf (vect_dump, "operand 1 using scalar mode.");
2199 vec_oprnd1 = op1;
2200 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2201 if (slp_node)
2202 {
2203 /* Store vec_oprnd1 for every vector stmt to be created
2204 for SLP_NODE. We check during the analysis that all the
2205 shift arguments are the same.
2206 TODO: Allow different constants for different vector
2207 stmts generated for an SLP instance. */
2208 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2209 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2210 }
2211 }
2212 }
2213
2214 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2215 (a special case for certain kind of vector shifts); otherwise,
2216 operand 1 should be of a vector type (the usual case). */
2217 if (op_type == binary_op && !vec_oprnd1)
2218 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2219 slp_node);
2220 else
2221 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2222 slp_node);
2223 }
2224 else
2225 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2226
2227 /* Arguments are ready. Create the new vector stmt. */
2228 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2229 {
2230 vop1 = ((op_type == binary_op)
2231 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2232 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2233 new_temp = make_ssa_name (vec_dest, new_stmt);
2234 gimple_assign_set_lhs (new_stmt, new_temp);
2235 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2236 if (slp_node)
2237 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2238 }
2239
2240 if (slp_node)
2241 continue;
2242
2243 if (j == 0)
2244 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2245 else
2246 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2247 prev_stmt_info = vinfo_for_stmt (new_stmt);
2248 }
2249
2250 VEC_free (tree, heap, vec_oprnds0);
2251 if (vec_oprnds1)
2252 VEC_free (tree, heap, vec_oprnds1);
2253
2254 return true;
2255 }
2256
2257
2258 /* Get vectorized definitions for loop-based vectorization. For the first
2259 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2260 scalar operand), and for the rest we get a copy with
2261 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2262 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2263 The vectors are collected into VEC_OPRNDS. */
2264
2265 static void
2266 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2267 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2268 {
2269 tree vec_oprnd;
2270
2271 /* Get first vector operand. */
2272 /* All the vector operands except the very first one (that is scalar oprnd)
2273 are stmt copies. */
2274 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2275 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2276 else
2277 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2278
2279 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2280
2281 /* Get second vector operand. */
2282 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2283 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2284
2285 *oprnd = vec_oprnd;
2286
2287 /* For conversion in multiple steps, continue to get operands
2288 recursively. */
2289 if (multi_step_cvt)
2290 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2291 }
2292
2293
2294 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2295 For multi-step conversions store the resulting vectors and call the function
2296 recursively. */
2297
2298 static void
2299 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2300 int multi_step_cvt, gimple stmt,
2301 VEC (tree, heap) *vec_dsts,
2302 gimple_stmt_iterator *gsi,
2303 slp_tree slp_node, enum tree_code code,
2304 stmt_vec_info *prev_stmt_info)
2305 {
2306 unsigned int i;
2307 tree vop0, vop1, new_tmp, vec_dest;
2308 gimple new_stmt;
2309 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2310
2311 vec_dest = VEC_pop (tree, vec_dsts);
2312
2313 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2314 {
2315 /* Create demotion operation. */
2316 vop0 = VEC_index (tree, *vec_oprnds, i);
2317 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2318 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2319 new_tmp = make_ssa_name (vec_dest, new_stmt);
2320 gimple_assign_set_lhs (new_stmt, new_tmp);
2321 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2322
2323 if (multi_step_cvt)
2324 /* Store the resulting vector for next recursive call. */
2325 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2326 else
2327 {
2328 /* This is the last step of the conversion sequence. Store the
2329 vectors in SLP_NODE or in vector info of the scalar statement
2330 (or in STMT_VINFO_RELATED_STMT chain). */
2331 if (slp_node)
2332 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2333 else
2334 {
2335 if (!*prev_stmt_info)
2336 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2337 else
2338 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2339
2340 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2341 }
2342 }
2343 }
2344
2345 /* For multi-step demotion operations we first generate demotion operations
2346 from the source type to the intermediate types, and then combine the
2347 results (stored in VEC_OPRNDS) in demotion operation to the destination
2348 type. */
2349 if (multi_step_cvt)
2350 {
2351 /* At each level of recursion we have have of the operands we had at the
2352 previous level. */
2353 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2354 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2355 stmt, vec_dsts, gsi, slp_node,
2356 code, prev_stmt_info);
2357 }
2358 }
2359
2360
2361 /* Function vectorizable_type_demotion
2362
2363 Check if STMT performs a binary or unary operation that involves
2364 type demotion, and if it can be vectorized.
2365 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2366 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2367 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2368
2369 static bool
2370 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2371 gimple *vec_stmt, slp_tree slp_node)
2372 {
2373 tree vec_dest;
2374 tree scalar_dest;
2375 tree op0;
2376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2377 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2378 enum tree_code code, code1 = ERROR_MARK;
2379 tree def;
2380 gimple def_stmt;
2381 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2382 stmt_vec_info prev_stmt_info;
2383 int nunits_in;
2384 int nunits_out;
2385 tree vectype_out;
2386 int ncopies;
2387 int j, i;
2388 tree vectype_in;
2389 int multi_step_cvt = 0;
2390 VEC (tree, heap) *vec_oprnds0 = NULL;
2391 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2392 tree last_oprnd, intermediate_type;
2393
2394 /* FORNOW: not supported by basic block SLP vectorization. */
2395 gcc_assert (loop_vinfo);
2396
2397 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2398 return false;
2399
2400 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2401 return false;
2402
2403 /* Is STMT a vectorizable type-demotion operation? */
2404 if (!is_gimple_assign (stmt))
2405 return false;
2406
2407 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2408 return false;
2409
2410 code = gimple_assign_rhs_code (stmt);
2411 if (!CONVERT_EXPR_CODE_P (code))
2412 return false;
2413
2414 op0 = gimple_assign_rhs1 (stmt);
2415 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2416 if (!vectype_in)
2417 return false;
2418 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2419
2420 scalar_dest = gimple_assign_lhs (stmt);
2421 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2422 if (!vectype_out)
2423 return false;
2424 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2425 if (nunits_in >= nunits_out)
2426 return false;
2427
2428 /* Multiple types in SLP are handled by creating the appropriate number of
2429 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2430 case of SLP. */
2431 if (slp_node)
2432 ncopies = 1;
2433 else
2434 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2435 gcc_assert (ncopies >= 1);
2436
2437 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2438 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2439 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2440 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2441 && CONVERT_EXPR_CODE_P (code))))
2442 return false;
2443
2444 /* Check the operands of the operation. */
2445 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2446 {
2447 if (vect_print_dump_info (REPORT_DETAILS))
2448 fprintf (vect_dump, "use not simple.");
2449 return false;
2450 }
2451
2452 /* Supportable by target? */
2453 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2454 &multi_step_cvt, &interm_types))
2455 return false;
2456
2457 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2458
2459 if (!vec_stmt) /* transformation not required. */
2460 {
2461 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2462 if (vect_print_dump_info (REPORT_DETAILS))
2463 fprintf (vect_dump, "=== vectorizable_demotion ===");
2464 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2465 return true;
2466 }
2467
2468 /** Transform. **/
2469 if (vect_print_dump_info (REPORT_DETAILS))
2470 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2471 ncopies);
2472
2473 /* In case of multi-step demotion, we first generate demotion operations to
2474 the intermediate types, and then from that types to the final one.
2475 We create vector destinations for the intermediate type (TYPES) received
2476 from supportable_narrowing_operation, and store them in the correct order
2477 for future use in vect_create_vectorized_demotion_stmts(). */
2478 if (multi_step_cvt)
2479 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2480 else
2481 vec_dsts = VEC_alloc (tree, heap, 1);
2482
2483 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2484 VEC_quick_push (tree, vec_dsts, vec_dest);
2485
2486 if (multi_step_cvt)
2487 {
2488 for (i = VEC_length (tree, interm_types) - 1;
2489 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2490 {
2491 vec_dest = vect_create_destination_var (scalar_dest,
2492 intermediate_type);
2493 VEC_quick_push (tree, vec_dsts, vec_dest);
2494 }
2495 }
2496
2497 /* In case the vectorization factor (VF) is bigger than the number
2498 of elements that we can fit in a vectype (nunits), we have to generate
2499 more than one vector stmt - i.e - we need to "unroll" the
2500 vector stmt by a factor VF/nunits. */
2501 last_oprnd = op0;
2502 prev_stmt_info = NULL;
2503 for (j = 0; j < ncopies; j++)
2504 {
2505 /* Handle uses. */
2506 if (slp_node)
2507 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2508 else
2509 {
2510 VEC_free (tree, heap, vec_oprnds0);
2511 vec_oprnds0 = VEC_alloc (tree, heap,
2512 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2513 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2514 vect_pow2 (multi_step_cvt) - 1);
2515 }
2516
2517 /* Arguments are ready. Create the new vector stmts. */
2518 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2519 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2520 multi_step_cvt, stmt, tmp_vec_dsts,
2521 gsi, slp_node, code1,
2522 &prev_stmt_info);
2523 }
2524
2525 VEC_free (tree, heap, vec_oprnds0);
2526 VEC_free (tree, heap, vec_dsts);
2527 VEC_free (tree, heap, tmp_vec_dsts);
2528 VEC_free (tree, heap, interm_types);
2529
2530 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2531 return true;
2532 }
2533
2534
2535 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2536 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2537 the resulting vectors and call the function recursively. */
2538
2539 static void
2540 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2541 VEC (tree, heap) **vec_oprnds1,
2542 int multi_step_cvt, gimple stmt,
2543 VEC (tree, heap) *vec_dsts,
2544 gimple_stmt_iterator *gsi,
2545 slp_tree slp_node, enum tree_code code1,
2546 enum tree_code code2, tree decl1,
2547 tree decl2, int op_type,
2548 stmt_vec_info *prev_stmt_info)
2549 {
2550 int i;
2551 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2552 gimple new_stmt1, new_stmt2;
2553 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2554 VEC (tree, heap) *vec_tmp;
2555
2556 vec_dest = VEC_pop (tree, vec_dsts);
2557 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2558
2559 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2560 {
2561 if (op_type == binary_op)
2562 vop1 = VEC_index (tree, *vec_oprnds1, i);
2563 else
2564 vop1 = NULL_TREE;
2565
2566 /* Generate the two halves of promotion operation. */
2567 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2568 op_type, vec_dest, gsi, stmt);
2569 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2570 op_type, vec_dest, gsi, stmt);
2571 if (is_gimple_call (new_stmt1))
2572 {
2573 new_tmp1 = gimple_call_lhs (new_stmt1);
2574 new_tmp2 = gimple_call_lhs (new_stmt2);
2575 }
2576 else
2577 {
2578 new_tmp1 = gimple_assign_lhs (new_stmt1);
2579 new_tmp2 = gimple_assign_lhs (new_stmt2);
2580 }
2581
2582 if (multi_step_cvt)
2583 {
2584 /* Store the results for the recursive call. */
2585 VEC_quick_push (tree, vec_tmp, new_tmp1);
2586 VEC_quick_push (tree, vec_tmp, new_tmp2);
2587 }
2588 else
2589 {
2590 /* Last step of promotion sequience - store the results. */
2591 if (slp_node)
2592 {
2593 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2594 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2595 }
2596 else
2597 {
2598 if (!*prev_stmt_info)
2599 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2600 else
2601 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2602
2603 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2604 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2605 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2606 }
2607 }
2608 }
2609
2610 if (multi_step_cvt)
2611 {
2612 /* For multi-step promotion operation we first generate we call the
2613 function recurcively for every stage. We start from the input type,
2614 create promotion operations to the intermediate types, and then
2615 create promotions to the output type. */
2616 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2617 VEC_free (tree, heap, vec_tmp);
2618 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2619 multi_step_cvt - 1, stmt,
2620 vec_dsts, gsi, slp_node, code1,
2621 code2, decl2, decl2, op_type,
2622 prev_stmt_info);
2623 }
2624 }
2625
2626
2627 /* Function vectorizable_type_promotion
2628
2629 Check if STMT performs a binary or unary operation that involves
2630 type promotion, and if it can be vectorized.
2631 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2632 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2633 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2634
2635 static bool
2636 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2637 gimple *vec_stmt, slp_tree slp_node)
2638 {
2639 tree vec_dest;
2640 tree scalar_dest;
2641 tree op0, op1 = NULL;
2642 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2643 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2644 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2645 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2646 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2647 int op_type;
2648 tree def;
2649 gimple def_stmt;
2650 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2651 stmt_vec_info prev_stmt_info;
2652 int nunits_in;
2653 int nunits_out;
2654 tree vectype_out;
2655 int ncopies;
2656 int j, i;
2657 tree vectype_in;
2658 tree intermediate_type = NULL_TREE;
2659 int multi_step_cvt = 0;
2660 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2661 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2662
2663 /* FORNOW: not supported by basic block SLP vectorization. */
2664 gcc_assert (loop_vinfo);
2665
2666 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2667 return false;
2668
2669 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2670 return false;
2671
2672 /* Is STMT a vectorizable type-promotion operation? */
2673 if (!is_gimple_assign (stmt))
2674 return false;
2675
2676 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2677 return false;
2678
2679 code = gimple_assign_rhs_code (stmt);
2680 if (!CONVERT_EXPR_CODE_P (code)
2681 && code != WIDEN_MULT_EXPR)
2682 return false;
2683
2684 op0 = gimple_assign_rhs1 (stmt);
2685 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2686 if (!vectype_in)
2687 return false;
2688 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2689
2690 scalar_dest = gimple_assign_lhs (stmt);
2691 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2692 if (!vectype_out)
2693 return false;
2694 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2695 if (nunits_in <= nunits_out)
2696 return false;
2697
2698 /* Multiple types in SLP are handled by creating the appropriate number of
2699 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2700 case of SLP. */
2701 if (slp_node)
2702 ncopies = 1;
2703 else
2704 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2705
2706 gcc_assert (ncopies >= 1);
2707
2708 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2709 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2710 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2711 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2712 && CONVERT_EXPR_CODE_P (code))))
2713 return false;
2714
2715 /* Check the operands of the operation. */
2716 if (!vect_is_simple_use (op0, loop_vinfo, NULL, &def_stmt, &def, &dt[0]))
2717 {
2718 if (vect_print_dump_info (REPORT_DETAILS))
2719 fprintf (vect_dump, "use not simple.");
2720 return false;
2721 }
2722
2723 op_type = TREE_CODE_LENGTH (code);
2724 if (op_type == binary_op)
2725 {
2726 op1 = gimple_assign_rhs2 (stmt);
2727 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
2728 {
2729 if (vect_print_dump_info (REPORT_DETAILS))
2730 fprintf (vect_dump, "use not simple.");
2731 return false;
2732 }
2733 }
2734
2735 /* Supportable by target? */
2736 if (!supportable_widening_operation (code, stmt, vectype_in,
2737 &decl1, &decl2, &code1, &code2,
2738 &multi_step_cvt, &interm_types))
2739 return false;
2740
2741 /* Binary widening operation can only be supported directly by the
2742 architecture. */
2743 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2744
2745 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2746
2747 if (!vec_stmt) /* transformation not required. */
2748 {
2749 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2750 if (vect_print_dump_info (REPORT_DETAILS))
2751 fprintf (vect_dump, "=== vectorizable_promotion ===");
2752 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2753 return true;
2754 }
2755
2756 /** Transform. **/
2757
2758 if (vect_print_dump_info (REPORT_DETAILS))
2759 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2760 ncopies);
2761
2762 /* Handle def. */
2763 /* In case of multi-step promotion, we first generate promotion operations
2764 to the intermediate types, and then from that types to the final one.
2765 We store vector destination in VEC_DSTS in the correct order for
2766 recursive creation of promotion operations in
2767 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2768 according to TYPES recieved from supportable_widening_operation(). */
2769 if (multi_step_cvt)
2770 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2771 else
2772 vec_dsts = VEC_alloc (tree, heap, 1);
2773
2774 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2775 VEC_quick_push (tree, vec_dsts, vec_dest);
2776
2777 if (multi_step_cvt)
2778 {
2779 for (i = VEC_length (tree, interm_types) - 1;
2780 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2781 {
2782 vec_dest = vect_create_destination_var (scalar_dest,
2783 intermediate_type);
2784 VEC_quick_push (tree, vec_dsts, vec_dest);
2785 }
2786 }
2787
2788 if (!slp_node)
2789 {
2790 vec_oprnds0 = VEC_alloc (tree, heap,
2791 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2792 if (op_type == binary_op)
2793 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2794 }
2795
2796 /* In case the vectorization factor (VF) is bigger than the number
2797 of elements that we can fit in a vectype (nunits), we have to generate
2798 more than one vector stmt - i.e - we need to "unroll" the
2799 vector stmt by a factor VF/nunits. */
2800
2801 prev_stmt_info = NULL;
2802 for (j = 0; j < ncopies; j++)
2803 {
2804 /* Handle uses. */
2805 if (j == 0)
2806 {
2807 if (slp_node)
2808 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2809 else
2810 {
2811 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2812 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2813 if (op_type == binary_op)
2814 {
2815 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2816 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2817 }
2818 }
2819 }
2820 else
2821 {
2822 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2823 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2824 if (op_type == binary_op)
2825 {
2826 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2827 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2828 }
2829 }
2830
2831 /* Arguments are ready. Create the new vector stmts. */
2832 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2833 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2834 multi_step_cvt, stmt,
2835 tmp_vec_dsts,
2836 gsi, slp_node, code1, code2,
2837 decl1, decl2, op_type,
2838 &prev_stmt_info);
2839 }
2840
2841 VEC_free (tree, heap, vec_dsts);
2842 VEC_free (tree, heap, tmp_vec_dsts);
2843 VEC_free (tree, heap, interm_types);
2844 VEC_free (tree, heap, vec_oprnds0);
2845 VEC_free (tree, heap, vec_oprnds1);
2846
2847 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2848 return true;
2849 }
2850
2851
2852 /* Function vectorizable_store.
2853
2854 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2855 can be vectorized.
2856 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2857 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2858 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2859
2860 static bool
2861 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2862 slp_tree slp_node)
2863 {
2864 tree scalar_dest;
2865 tree data_ref;
2866 tree op;
2867 tree vec_oprnd = NULL_TREE;
2868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2869 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2870 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2871 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2872 struct loop *loop = NULL;
2873 enum machine_mode vec_mode;
2874 tree dummy;
2875 enum dr_alignment_support alignment_support_scheme;
2876 tree def;
2877 gimple def_stmt;
2878 enum vect_def_type dt;
2879 stmt_vec_info prev_stmt_info = NULL;
2880 tree dataref_ptr = NULL_TREE;
2881 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2882 int ncopies;
2883 int j;
2884 gimple next_stmt, first_stmt = NULL;
2885 bool strided_store = false;
2886 unsigned int group_size, i;
2887 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2888 bool inv_p;
2889 VEC(tree,heap) *vec_oprnds = NULL;
2890 bool slp = (slp_node != NULL);
2891 unsigned int vec_num;
2892 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2893
2894 if (loop_vinfo)
2895 loop = LOOP_VINFO_LOOP (loop_vinfo);
2896
2897 /* Multiple types in SLP are handled by creating the appropriate number of
2898 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2899 case of SLP. */
2900 if (slp)
2901 ncopies = 1;
2902 else
2903 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2904
2905 gcc_assert (ncopies >= 1);
2906
2907 /* FORNOW. This restriction should be relaxed. */
2908 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2909 {
2910 if (vect_print_dump_info (REPORT_DETAILS))
2911 fprintf (vect_dump, "multiple types in nested loop.");
2912 return false;
2913 }
2914
2915 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2916 return false;
2917
2918 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2919 return false;
2920
2921 /* Is vectorizable store? */
2922
2923 if (!is_gimple_assign (stmt))
2924 return false;
2925
2926 scalar_dest = gimple_assign_lhs (stmt);
2927 if (TREE_CODE (scalar_dest) != ARRAY_REF
2928 && TREE_CODE (scalar_dest) != INDIRECT_REF
2929 && TREE_CODE (scalar_dest) != COMPONENT_REF
2930 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
2931 && TREE_CODE (scalar_dest) != REALPART_EXPR)
2932 return false;
2933
2934 gcc_assert (gimple_assign_single_p (stmt));
2935 op = gimple_assign_rhs1 (stmt);
2936 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
2937 {
2938 if (vect_print_dump_info (REPORT_DETAILS))
2939 fprintf (vect_dump, "use not simple.");
2940 return false;
2941 }
2942
2943 /* The scalar rhs type needs to be trivially convertible to the vector
2944 component type. This should always be the case. */
2945 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2946 {
2947 if (vect_print_dump_info (REPORT_DETAILS))
2948 fprintf (vect_dump, "??? operands of different types");
2949 return false;
2950 }
2951
2952 vec_mode = TYPE_MODE (vectype);
2953 /* FORNOW. In some cases can vectorize even if data-type not supported
2954 (e.g. - array initialization with 0). */
2955 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2956 return false;
2957
2958 if (!STMT_VINFO_DATA_REF (stmt_info))
2959 return false;
2960
2961 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2962 {
2963 strided_store = true;
2964 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2965 if (!vect_strided_store_supported (vectype)
2966 && !PURE_SLP_STMT (stmt_info) && !slp)
2967 return false;
2968
2969 if (first_stmt == stmt)
2970 {
2971 /* STMT is the leader of the group. Check the operands of all the
2972 stmts of the group. */
2973 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
2974 while (next_stmt)
2975 {
2976 gcc_assert (gimple_assign_single_p (next_stmt));
2977 op = gimple_assign_rhs1 (next_stmt);
2978 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
2979 &def, &dt))
2980 {
2981 if (vect_print_dump_info (REPORT_DETAILS))
2982 fprintf (vect_dump, "use not simple.");
2983 return false;
2984 }
2985 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2986 }
2987 }
2988 }
2989
2990 if (!vec_stmt) /* transformation not required. */
2991 {
2992 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2993 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
2994 return true;
2995 }
2996
2997 /** Transform. **/
2998
2999 if (strided_store)
3000 {
3001 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3002 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3003
3004 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3005
3006 /* FORNOW */
3007 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3008
3009 /* We vectorize all the stmts of the interleaving group when we
3010 reach the last stmt in the group. */
3011 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3012 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
3013 && !slp)
3014 {
3015 *vec_stmt = NULL;
3016 return true;
3017 }
3018
3019 if (slp)
3020 strided_store = false;
3021
3022 /* VEC_NUM is the number of vect stmts to be created for this group. */
3023 if (slp)
3024 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3025 else
3026 vec_num = group_size;
3027 }
3028 else
3029 {
3030 first_stmt = stmt;
3031 first_dr = dr;
3032 group_size = vec_num = 1;
3033 }
3034
3035 if (vect_print_dump_info (REPORT_DETAILS))
3036 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3037
3038 dr_chain = VEC_alloc (tree, heap, group_size);
3039 oprnds = VEC_alloc (tree, heap, group_size);
3040
3041 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3042 gcc_assert (alignment_support_scheme);
3043
3044 /* In case the vectorization factor (VF) is bigger than the number
3045 of elements that we can fit in a vectype (nunits), we have to generate
3046 more than one vector stmt - i.e - we need to "unroll" the
3047 vector stmt by a factor VF/nunits. For more details see documentation in
3048 vect_get_vec_def_for_copy_stmt. */
3049
3050 /* In case of interleaving (non-unit strided access):
3051
3052 S1: &base + 2 = x2
3053 S2: &base = x0
3054 S3: &base + 1 = x1
3055 S4: &base + 3 = x3
3056
3057 We create vectorized stores starting from base address (the access of the
3058 first stmt in the chain (S2 in the above example), when the last store stmt
3059 of the chain (S4) is reached:
3060
3061 VS1: &base = vx2
3062 VS2: &base + vec_size*1 = vx0
3063 VS3: &base + vec_size*2 = vx1
3064 VS4: &base + vec_size*3 = vx3
3065
3066 Then permutation statements are generated:
3067
3068 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3069 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3070 ...
3071
3072 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3073 (the order of the data-refs in the output of vect_permute_store_chain
3074 corresponds to the order of scalar stmts in the interleaving chain - see
3075 the documentation of vect_permute_store_chain()).
3076
3077 In case of both multiple types and interleaving, above vector stores and
3078 permutation stmts are created for every copy. The result vector stmts are
3079 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3080 STMT_VINFO_RELATED_STMT for the next copies.
3081 */
3082
3083 prev_stmt_info = NULL;
3084 for (j = 0; j < ncopies; j++)
3085 {
3086 gimple new_stmt;
3087 gimple ptr_incr;
3088
3089 if (j == 0)
3090 {
3091 if (slp)
3092 {
3093 /* Get vectorized arguments for SLP_NODE. */
3094 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3095
3096 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3097 }
3098 else
3099 {
3100 /* For interleaved stores we collect vectorized defs for all the
3101 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3102 used as an input to vect_permute_store_chain(), and OPRNDS as
3103 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3104
3105 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3106 OPRNDS are of size 1. */
3107 next_stmt = first_stmt;
3108 for (i = 0; i < group_size; i++)
3109 {
3110 /* Since gaps are not supported for interleaved stores,
3111 GROUP_SIZE is the exact number of stmts in the chain.
3112 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3113 there is no interleaving, GROUP_SIZE is 1, and only one
3114 iteration of the loop will be executed. */
3115 gcc_assert (next_stmt
3116 && gimple_assign_single_p (next_stmt));
3117 op = gimple_assign_rhs1 (next_stmt);
3118
3119 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3120 NULL);
3121 VEC_quick_push(tree, dr_chain, vec_oprnd);
3122 VEC_quick_push(tree, oprnds, vec_oprnd);
3123 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3124 }
3125 }
3126
3127 /* We should have catched mismatched types earlier. */
3128 gcc_assert (useless_type_conversion_p (vectype,
3129 TREE_TYPE (vec_oprnd)));
3130 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3131 &dummy, &ptr_incr, false,
3132 &inv_p);
3133 gcc_assert (bb_vinfo || !inv_p);
3134 }
3135 else
3136 {
3137 /* For interleaved stores we created vectorized defs for all the
3138 defs stored in OPRNDS in the previous iteration (previous copy).
3139 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3140 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3141 next copy.
3142 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3143 OPRNDS are of size 1. */
3144 for (i = 0; i < group_size; i++)
3145 {
3146 op = VEC_index (tree, oprnds, i);
3147 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3148 &dt);
3149 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3150 VEC_replace(tree, dr_chain, i, vec_oprnd);
3151 VEC_replace(tree, oprnds, i, vec_oprnd);
3152 }
3153 dataref_ptr =
3154 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3155 }
3156
3157 if (strided_store)
3158 {
3159 result_chain = VEC_alloc (tree, heap, group_size);
3160 /* Permute. */
3161 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3162 &result_chain))
3163 return false;
3164 }
3165
3166 next_stmt = first_stmt;
3167 for (i = 0; i < vec_num; i++)
3168 {
3169 if (i > 0)
3170 /* Bump the vector pointer. */
3171 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3172 NULL_TREE);
3173
3174 if (slp)
3175 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3176 else if (strided_store)
3177 /* For strided stores vectorized defs are interleaved in
3178 vect_permute_store_chain(). */
3179 vec_oprnd = VEC_index (tree, result_chain, i);
3180
3181 if (aligned_access_p (first_dr))
3182 data_ref = build_fold_indirect_ref (dataref_ptr);
3183 else
3184 {
3185 int mis = DR_MISALIGNMENT (first_dr);
3186 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3187 tmis = size_binop (MULT_EXPR, tmis, size_int (BITS_PER_UNIT));
3188 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3189 }
3190
3191 /* If accesses through a pointer to vectype do not alias the original
3192 memory reference we have a problem. This should never happen. */
3193 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3194 get_alias_set (gimple_assign_lhs (stmt))));
3195
3196 /* Arguments are ready. Create the new vector stmt. */
3197 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3198 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3199 mark_symbols_for_renaming (new_stmt);
3200
3201 if (slp)
3202 continue;
3203
3204 if (j == 0)
3205 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3206 else
3207 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3208
3209 prev_stmt_info = vinfo_for_stmt (new_stmt);
3210 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3211 if (!next_stmt)
3212 break;
3213 }
3214 }
3215
3216 VEC_free (tree, heap, dr_chain);
3217 VEC_free (tree, heap, oprnds);
3218 if (result_chain)
3219 VEC_free (tree, heap, result_chain);
3220
3221 return true;
3222 }
3223
3224 /* vectorizable_load.
3225
3226 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3227 can be vectorized.
3228 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3229 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3230 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3231
3232 static bool
3233 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3234 slp_tree slp_node, slp_instance slp_node_instance)
3235 {
3236 tree scalar_dest;
3237 tree vec_dest = NULL;
3238 tree data_ref = NULL;
3239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3240 stmt_vec_info prev_stmt_info;
3241 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3242 struct loop *loop = NULL;
3243 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3244 bool nested_in_vect_loop = false;
3245 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3246 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3247 tree new_temp;
3248 int mode;
3249 gimple new_stmt = NULL;
3250 tree dummy;
3251 enum dr_alignment_support alignment_support_scheme;
3252 tree dataref_ptr = NULL_TREE;
3253 gimple ptr_incr;
3254 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3255 int ncopies;
3256 int i, j, group_size;
3257 tree msq = NULL_TREE, lsq;
3258 tree offset = NULL_TREE;
3259 tree realignment_token = NULL_TREE;
3260 gimple phi = NULL;
3261 VEC(tree,heap) *dr_chain = NULL;
3262 bool strided_load = false;
3263 gimple first_stmt;
3264 tree scalar_type;
3265 bool inv_p;
3266 bool compute_in_loop = false;
3267 struct loop *at_loop;
3268 int vec_num;
3269 bool slp = (slp_node != NULL);
3270 bool slp_perm = false;
3271 enum tree_code code;
3272 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3273 int vf;
3274
3275 if (loop_vinfo)
3276 {
3277 loop = LOOP_VINFO_LOOP (loop_vinfo);
3278 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3279 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3280 }
3281 else
3282 /* FORNOW: multiple types are not supported in basic block SLP. */
3283 vf = nunits;
3284
3285 /* Multiple types in SLP are handled by creating the appropriate number of
3286 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3287 case of SLP. */
3288 if (slp)
3289 ncopies = 1;
3290 else
3291 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3292
3293 gcc_assert (ncopies >= 1);
3294
3295 /* FORNOW. This restriction should be relaxed. */
3296 if (nested_in_vect_loop && ncopies > 1)
3297 {
3298 if (vect_print_dump_info (REPORT_DETAILS))
3299 fprintf (vect_dump, "multiple types in nested loop.");
3300 return false;
3301 }
3302
3303 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3304 return false;
3305
3306 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3307 return false;
3308
3309 /* Is vectorizable load? */
3310 if (!is_gimple_assign (stmt))
3311 return false;
3312
3313 scalar_dest = gimple_assign_lhs (stmt);
3314 if (TREE_CODE (scalar_dest) != SSA_NAME)
3315 return false;
3316
3317 code = gimple_assign_rhs_code (stmt);
3318 if (code != ARRAY_REF
3319 && code != INDIRECT_REF
3320 && code != COMPONENT_REF
3321 && code != IMAGPART_EXPR
3322 && code != REALPART_EXPR)
3323 return false;
3324
3325 if (!STMT_VINFO_DATA_REF (stmt_info))
3326 return false;
3327
3328 scalar_type = TREE_TYPE (DR_REF (dr));
3329 mode = (int) TYPE_MODE (vectype);
3330
3331 /* FORNOW. In some cases can vectorize even if data-type not supported
3332 (e.g. - data copies). */
3333 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3334 {
3335 if (vect_print_dump_info (REPORT_DETAILS))
3336 fprintf (vect_dump, "Aligned load, but unsupported type.");
3337 return false;
3338 }
3339
3340 /* The vector component type needs to be trivially convertible to the
3341 scalar lhs. This should always be the case. */
3342 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3343 {
3344 if (vect_print_dump_info (REPORT_DETAILS))
3345 fprintf (vect_dump, "??? operands of different types");
3346 return false;
3347 }
3348
3349 /* Check if the load is a part of an interleaving chain. */
3350 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3351 {
3352 strided_load = true;
3353 /* FORNOW */
3354 gcc_assert (! nested_in_vect_loop);
3355
3356 /* Check if interleaving is supported. */
3357 if (!vect_strided_load_supported (vectype)
3358 && !PURE_SLP_STMT (stmt_info) && !slp)
3359 return false;
3360 }
3361
3362 if (!vec_stmt) /* transformation not required. */
3363 {
3364 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3365 vect_model_load_cost (stmt_info, ncopies, NULL);
3366 return true;
3367 }
3368
3369 if (vect_print_dump_info (REPORT_DETAILS))
3370 fprintf (vect_dump, "transform load.");
3371
3372 /** Transform. **/
3373
3374 if (strided_load)
3375 {
3376 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3377 /* Check if the chain of loads is already vectorized. */
3378 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3379 {
3380 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3381 return true;
3382 }
3383 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3384 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3385
3386 /* VEC_NUM is the number of vect stmts to be created for this group. */
3387 if (slp)
3388 {
3389 strided_load = false;
3390 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3391 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3392 slp_perm = true;
3393 }
3394 else
3395 vec_num = group_size;
3396
3397 dr_chain = VEC_alloc (tree, heap, vec_num);
3398 }
3399 else
3400 {
3401 first_stmt = stmt;
3402 first_dr = dr;
3403 group_size = vec_num = 1;
3404 }
3405
3406 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3407 gcc_assert (alignment_support_scheme);
3408
3409 /* In case the vectorization factor (VF) is bigger than the number
3410 of elements that we can fit in a vectype (nunits), we have to generate
3411 more than one vector stmt - i.e - we need to "unroll" the
3412 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3413 from one copy of the vector stmt to the next, in the field
3414 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3415 stages to find the correct vector defs to be used when vectorizing
3416 stmts that use the defs of the current stmt. The example below illustrates
3417 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3418 4 vectorized stmts):
3419
3420 before vectorization:
3421 RELATED_STMT VEC_STMT
3422 S1: x = memref - -
3423 S2: z = x + 1 - -
3424
3425 step 1: vectorize stmt S1:
3426 We first create the vector stmt VS1_0, and, as usual, record a
3427 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3428 Next, we create the vector stmt VS1_1, and record a pointer to
3429 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3430 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3431 stmts and pointers:
3432 RELATED_STMT VEC_STMT
3433 VS1_0: vx0 = memref0 VS1_1 -
3434 VS1_1: vx1 = memref1 VS1_2 -
3435 VS1_2: vx2 = memref2 VS1_3 -
3436 VS1_3: vx3 = memref3 - -
3437 S1: x = load - VS1_0
3438 S2: z = x + 1 - -
3439
3440 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3441 information we recorded in RELATED_STMT field is used to vectorize
3442 stmt S2. */
3443
3444 /* In case of interleaving (non-unit strided access):
3445
3446 S1: x2 = &base + 2
3447 S2: x0 = &base
3448 S3: x1 = &base + 1
3449 S4: x3 = &base + 3
3450
3451 Vectorized loads are created in the order of memory accesses
3452 starting from the access of the first stmt of the chain:
3453
3454 VS1: vx0 = &base
3455 VS2: vx1 = &base + vec_size*1
3456 VS3: vx3 = &base + vec_size*2
3457 VS4: vx4 = &base + vec_size*3
3458
3459 Then permutation statements are generated:
3460
3461 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3462 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3463 ...
3464
3465 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3466 (the order of the data-refs in the output of vect_permute_load_chain
3467 corresponds to the order of scalar stmts in the interleaving chain - see
3468 the documentation of vect_permute_load_chain()).
3469 The generation of permutation stmts and recording them in
3470 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3471
3472 In case of both multiple types and interleaving, the vector loads and
3473 permutation stmts above are created for every copy. The result vector stmts
3474 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3475 STMT_VINFO_RELATED_STMT for the next copies. */
3476
3477 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3478 on a target that supports unaligned accesses (dr_unaligned_supported)
3479 we generate the following code:
3480 p = initial_addr;
3481 indx = 0;
3482 loop {
3483 p = p + indx * vectype_size;
3484 vec_dest = *(p);
3485 indx = indx + 1;
3486 }
3487
3488 Otherwise, the data reference is potentially unaligned on a target that
3489 does not support unaligned accesses (dr_explicit_realign_optimized) -
3490 then generate the following code, in which the data in each iteration is
3491 obtained by two vector loads, one from the previous iteration, and one
3492 from the current iteration:
3493 p1 = initial_addr;
3494 msq_init = *(floor(p1))
3495 p2 = initial_addr + VS - 1;
3496 realignment_token = call target_builtin;
3497 indx = 0;
3498 loop {
3499 p2 = p2 + indx * vectype_size
3500 lsq = *(floor(p2))
3501 vec_dest = realign_load (msq, lsq, realignment_token)
3502 indx = indx + 1;
3503 msq = lsq;
3504 } */
3505
3506 /* If the misalignment remains the same throughout the execution of the
3507 loop, we can create the init_addr and permutation mask at the loop
3508 preheader. Otherwise, it needs to be created inside the loop.
3509 This can only occur when vectorizing memory accesses in the inner-loop
3510 nested within an outer-loop that is being vectorized. */
3511
3512 if (loop && nested_in_vect_loop_p (loop, stmt)
3513 && (TREE_INT_CST_LOW (DR_STEP (dr))
3514 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3515 {
3516 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3517 compute_in_loop = true;
3518 }
3519
3520 if ((alignment_support_scheme == dr_explicit_realign_optimized
3521 || alignment_support_scheme == dr_explicit_realign)
3522 && !compute_in_loop)
3523 {
3524 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3525 alignment_support_scheme, NULL_TREE,
3526 &at_loop);
3527 if (alignment_support_scheme == dr_explicit_realign_optimized)
3528 {
3529 phi = SSA_NAME_DEF_STMT (msq);
3530 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3531 }
3532 }
3533 else
3534 at_loop = loop;
3535
3536 prev_stmt_info = NULL;
3537 for (j = 0; j < ncopies; j++)
3538 {
3539 /* 1. Create the vector pointer update chain. */
3540 if (j == 0)
3541 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3542 at_loop, offset,
3543 &dummy, &ptr_incr, false,
3544 &inv_p);
3545 else
3546 dataref_ptr =
3547 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3548
3549 for (i = 0; i < vec_num; i++)
3550 {
3551 if (i > 0)
3552 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3553 NULL_TREE);
3554
3555 /* 2. Create the vector-load in the loop. */
3556 switch (alignment_support_scheme)
3557 {
3558 case dr_aligned:
3559 gcc_assert (aligned_access_p (first_dr));
3560 data_ref = build_fold_indirect_ref (dataref_ptr);
3561 break;
3562 case dr_unaligned_supported:
3563 {
3564 int mis = DR_MISALIGNMENT (first_dr);
3565 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3566
3567 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3568 data_ref =
3569 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3570 break;
3571 }
3572 case dr_explicit_realign:
3573 {
3574 tree ptr, bump;
3575 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3576
3577 if (compute_in_loop)
3578 msq = vect_setup_realignment (first_stmt, gsi,
3579 &realignment_token,
3580 dr_explicit_realign,
3581 dataref_ptr, NULL);
3582
3583 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3584 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3585 new_stmt = gimple_build_assign (vec_dest, data_ref);
3586 new_temp = make_ssa_name (vec_dest, new_stmt);
3587 gimple_assign_set_lhs (new_stmt, new_temp);
3588 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3589 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3590 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3591 msq = new_temp;
3592
3593 bump = size_binop (MULT_EXPR, vs_minus_1,
3594 TYPE_SIZE_UNIT (scalar_type));
3595 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3596 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3597 break;
3598 }
3599 case dr_explicit_realign_optimized:
3600 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3601 break;
3602 default:
3603 gcc_unreachable ();
3604 }
3605 /* If accesses through a pointer to vectype do not alias the original
3606 memory reference we have a problem. This should never happen. */
3607 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3608 get_alias_set (gimple_assign_rhs1 (stmt))));
3609 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3610 new_stmt = gimple_build_assign (vec_dest, data_ref);
3611 new_temp = make_ssa_name (vec_dest, new_stmt);
3612 gimple_assign_set_lhs (new_stmt, new_temp);
3613 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3614 mark_symbols_for_renaming (new_stmt);
3615
3616 /* 3. Handle explicit realignment if necessary/supported. Create in
3617 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3618 if (alignment_support_scheme == dr_explicit_realign_optimized
3619 || alignment_support_scheme == dr_explicit_realign)
3620 {
3621 tree tmp;
3622
3623 lsq = gimple_assign_lhs (new_stmt);
3624 if (!realignment_token)
3625 realignment_token = dataref_ptr;
3626 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3627 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3628 realignment_token);
3629 new_stmt = gimple_build_assign (vec_dest, tmp);
3630 new_temp = make_ssa_name (vec_dest, new_stmt);
3631 gimple_assign_set_lhs (new_stmt, new_temp);
3632 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3633
3634 if (alignment_support_scheme == dr_explicit_realign_optimized)
3635 {
3636 gcc_assert (phi);
3637 if (i == vec_num - 1 && j == ncopies - 1)
3638 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
3639 UNKNOWN_LOCATION);
3640 msq = lsq;
3641 }
3642 }
3643
3644 /* 4. Handle invariant-load. */
3645 if (inv_p && !bb_vinfo)
3646 {
3647 gcc_assert (!strided_load);
3648 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3649 if (j == 0)
3650 {
3651 int k;
3652 tree t = NULL_TREE;
3653 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3654
3655 /* CHECKME: bitpos depends on endianess? */
3656 bitpos = bitsize_zero_node;
3657 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3658 bitsize, bitpos);
3659 vec_dest =
3660 vect_create_destination_var (scalar_dest, NULL_TREE);
3661 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3662 new_temp = make_ssa_name (vec_dest, new_stmt);
3663 gimple_assign_set_lhs (new_stmt, new_temp);
3664 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3665
3666 for (k = nunits - 1; k >= 0; --k)
3667 t = tree_cons (NULL_TREE, new_temp, t);
3668 /* FIXME: use build_constructor directly. */
3669 vec_inv = build_constructor_from_list (vectype, t);
3670 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3671 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3672 }
3673 else
3674 gcc_unreachable (); /* FORNOW. */
3675 }
3676
3677 /* Collect vector loads and later create their permutation in
3678 vect_transform_strided_load (). */
3679 if (strided_load || slp_perm)
3680 VEC_quick_push (tree, dr_chain, new_temp);
3681
3682 /* Store vector loads in the corresponding SLP_NODE. */
3683 if (slp && !slp_perm)
3684 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3685 }
3686
3687 if (slp && !slp_perm)
3688 continue;
3689
3690 if (slp_perm)
3691 {
3692 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
3693 slp_node_instance, false))
3694 {
3695 VEC_free (tree, heap, dr_chain);
3696 return false;
3697 }
3698 }
3699 else
3700 {
3701 if (strided_load)
3702 {
3703 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3704 return false;
3705
3706 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3707 VEC_free (tree, heap, dr_chain);
3708 dr_chain = VEC_alloc (tree, heap, group_size);
3709 }
3710 else
3711 {
3712 if (j == 0)
3713 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3714 else
3715 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3716 prev_stmt_info = vinfo_for_stmt (new_stmt);
3717 }
3718 }
3719 }
3720
3721 if (dr_chain)
3722 VEC_free (tree, heap, dr_chain);
3723
3724 return true;
3725 }
3726
3727 /* Function vect_is_simple_cond.
3728
3729 Input:
3730 LOOP - the loop that is being vectorized.
3731 COND - Condition that is checked for simple use.
3732
3733 Returns whether a COND can be vectorized. Checks whether
3734 condition operands are supportable using vec_is_simple_use. */
3735
3736 static bool
3737 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3738 {
3739 tree lhs, rhs;
3740 tree def;
3741 enum vect_def_type dt;
3742
3743 if (!COMPARISON_CLASS_P (cond))
3744 return false;
3745
3746 lhs = TREE_OPERAND (cond, 0);
3747 rhs = TREE_OPERAND (cond, 1);
3748
3749 if (TREE_CODE (lhs) == SSA_NAME)
3750 {
3751 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3752 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
3753 &dt))
3754 return false;
3755 }
3756 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3757 && TREE_CODE (lhs) != FIXED_CST)
3758 return false;
3759
3760 if (TREE_CODE (rhs) == SSA_NAME)
3761 {
3762 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3763 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
3764 &dt))
3765 return false;
3766 }
3767 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3768 && TREE_CODE (rhs) != FIXED_CST)
3769 return false;
3770
3771 return true;
3772 }
3773
3774 /* vectorizable_condition.
3775
3776 Check if STMT is conditional modify expression that can be vectorized.
3777 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3778 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3779 at GSI.
3780
3781 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
3782 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
3783 else caluse if it is 2).
3784
3785 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3786
3787 bool
3788 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3789 gimple *vec_stmt, tree reduc_def, int reduc_index)
3790 {
3791 tree scalar_dest = NULL_TREE;
3792 tree vec_dest = NULL_TREE;
3793 tree op = NULL_TREE;
3794 tree cond_expr, then_clause, else_clause;
3795 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3796 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3797 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3798 tree vec_compare, vec_cond_expr;
3799 tree new_temp;
3800 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3801 enum machine_mode vec_mode;
3802 tree def;
3803 enum vect_def_type dt;
3804 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3805 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3806 enum tree_code code;
3807
3808 /* FORNOW: unsupported in basic block SLP. */
3809 gcc_assert (loop_vinfo);
3810
3811 gcc_assert (ncopies >= 1);
3812 if (ncopies > 1)
3813 return false; /* FORNOW */
3814
3815 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3816 return false;
3817
3818 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3819 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
3820 && reduc_def))
3821 return false;
3822
3823 /* FORNOW: SLP not supported. */
3824 if (STMT_SLP_TYPE (stmt_info))
3825 return false;
3826
3827 /* FORNOW: not yet supported. */
3828 if (STMT_VINFO_LIVE_P (stmt_info))
3829 {
3830 if (vect_print_dump_info (REPORT_DETAILS))
3831 fprintf (vect_dump, "value used after loop.");
3832 return false;
3833 }
3834
3835 /* Is vectorizable conditional operation? */
3836 if (!is_gimple_assign (stmt))
3837 return false;
3838
3839 code = gimple_assign_rhs_code (stmt);
3840
3841 if (code != COND_EXPR)
3842 return false;
3843
3844 gcc_assert (gimple_assign_single_p (stmt));
3845 op = gimple_assign_rhs1 (stmt);
3846 cond_expr = TREE_OPERAND (op, 0);
3847 then_clause = TREE_OPERAND (op, 1);
3848 else_clause = TREE_OPERAND (op, 2);
3849
3850 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3851 return false;
3852
3853 /* We do not handle two different vector types for the condition
3854 and the values. */
3855 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3856 return false;
3857
3858 if (TREE_CODE (then_clause) == SSA_NAME)
3859 {
3860 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3861 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
3862 &then_def_stmt, &def, &dt))
3863 return false;
3864 }
3865 else if (TREE_CODE (then_clause) != INTEGER_CST
3866 && TREE_CODE (then_clause) != REAL_CST
3867 && TREE_CODE (then_clause) != FIXED_CST)
3868 return false;
3869
3870 if (TREE_CODE (else_clause) == SSA_NAME)
3871 {
3872 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3873 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
3874 &else_def_stmt, &def, &dt))
3875 return false;
3876 }
3877 else if (TREE_CODE (else_clause) != INTEGER_CST
3878 && TREE_CODE (else_clause) != REAL_CST
3879 && TREE_CODE (else_clause) != FIXED_CST)
3880 return false;
3881
3882
3883 vec_mode = TYPE_MODE (vectype);
3884
3885 if (!vec_stmt)
3886 {
3887 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3888 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
3889 }
3890
3891 /* Transform */
3892
3893 /* Handle def. */
3894 scalar_dest = gimple_assign_lhs (stmt);
3895 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3896
3897 /* Handle cond expr. */
3898 vec_cond_lhs =
3899 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3900 vec_cond_rhs =
3901 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3902 if (reduc_index == 1)
3903 vec_then_clause = reduc_def;
3904 else
3905 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3906 if (reduc_index == 2)
3907 vec_else_clause = reduc_def;
3908 else
3909 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3910
3911 /* Arguments are ready. Create the new vector stmt. */
3912 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3913 vec_cond_lhs, vec_cond_rhs);
3914 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3915 vec_compare, vec_then_clause, vec_else_clause);
3916
3917 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3918 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3919 gimple_assign_set_lhs (*vec_stmt, new_temp);
3920 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3921
3922 return true;
3923 }
3924
3925
3926 /* Make sure the statement is vectorizable. */
3927
3928 bool
3929 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
3930 {
3931 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3932 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3933 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3934 bool ok;
3935 HOST_WIDE_INT dummy;
3936 tree scalar_type, vectype;
3937
3938 if (vect_print_dump_info (REPORT_DETAILS))
3939 {
3940 fprintf (vect_dump, "==> examining statement: ");
3941 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3942 }
3943
3944 if (gimple_has_volatile_ops (stmt))
3945 {
3946 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3947 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
3948
3949 return false;
3950 }
3951
3952 /* Skip stmts that do not need to be vectorized. In loops this is expected
3953 to include:
3954 - the COND_EXPR which is the loop exit condition
3955 - any LABEL_EXPRs in the loop
3956 - computations that are used only for array indexing or loop control.
3957 In basic blocks we only analyze statements that are a part of some SLP
3958 instance, therefore, all the statements are relevant. */
3959
3960 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3961 && !STMT_VINFO_LIVE_P (stmt_info))
3962 {
3963 if (vect_print_dump_info (REPORT_DETAILS))
3964 fprintf (vect_dump, "irrelevant.");
3965
3966 return true;
3967 }
3968
3969 switch (STMT_VINFO_DEF_TYPE (stmt_info))
3970 {
3971 case vect_internal_def:
3972 break;
3973
3974 case vect_reduction_def:
3975 case vect_nested_cycle:
3976 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
3977 || relevance == vect_used_in_outer_by_reduction
3978 || relevance == vect_unused_in_scope));
3979 break;
3980
3981 case vect_induction_def:
3982 case vect_constant_def:
3983 case vect_external_def:
3984 case vect_unknown_def_type:
3985 default:
3986 gcc_unreachable ();
3987 }
3988
3989 if (bb_vinfo)
3990 {
3991 gcc_assert (PURE_SLP_STMT (stmt_info));
3992
3993 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
3994 if (vect_print_dump_info (REPORT_DETAILS))
3995 {
3996 fprintf (vect_dump, "get vectype for scalar type: ");
3997 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3998 }
3999
4000 vectype = get_vectype_for_scalar_type (scalar_type);
4001 if (!vectype)
4002 {
4003 if (vect_print_dump_info (REPORT_DETAILS))
4004 {
4005 fprintf (vect_dump, "not SLPed: unsupported data-type ");
4006 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4007 }
4008 return false;
4009 }
4010
4011 if (vect_print_dump_info (REPORT_DETAILS))
4012 {
4013 fprintf (vect_dump, "vectype: ");
4014 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4015 }
4016
4017 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4018 }
4019
4020 if (STMT_VINFO_RELEVANT_P (stmt_info))
4021 {
4022 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4023 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4024 *need_to_vectorize = true;
4025 }
4026
4027 ok = true;
4028 if (!bb_vinfo
4029 && (STMT_VINFO_RELEVANT_P (stmt_info)
4030 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4031 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4032 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4033 || vectorizable_conversion (stmt, NULL, NULL, NULL)
4034 || vectorizable_operation (stmt, NULL, NULL, NULL)
4035 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4036 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4037 || vectorizable_call (stmt, NULL, NULL)
4038 || vectorizable_store (stmt, NULL, NULL, NULL)
4039 || vectorizable_reduction (stmt, NULL, NULL)
4040 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
4041 else
4042 {
4043 if (bb_vinfo)
4044 ok = (vectorizable_operation (stmt, NULL, NULL, node)
4045 || vectorizable_assignment (stmt, NULL, NULL, node)
4046 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4047 || vectorizable_store (stmt, NULL, NULL, node));
4048 }
4049
4050 if (!ok)
4051 {
4052 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4053 {
4054 fprintf (vect_dump, "not vectorized: relevant stmt not ");
4055 fprintf (vect_dump, "supported: ");
4056 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4057 }
4058
4059 return false;
4060 }
4061
4062 if (bb_vinfo)
4063 return true;
4064
4065 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4066 need extra handling, except for vectorizable reductions. */
4067 if (STMT_VINFO_LIVE_P (stmt_info)
4068 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4069 ok = vectorizable_live_operation (stmt, NULL, NULL);
4070
4071 if (!ok)
4072 {
4073 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4074 {
4075 fprintf (vect_dump, "not vectorized: live stmt not ");
4076 fprintf (vect_dump, "supported: ");
4077 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4078 }
4079
4080 return false;
4081 }
4082
4083 if (!PURE_SLP_STMT (stmt_info))
4084 {
4085 /* Groups of strided accesses whose size is not a power of 2 are not
4086 vectorizable yet using loop-vectorization. Therefore, if this stmt
4087 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4088 loop-based vectorized), the loop cannot be vectorized. */
4089 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4090 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4091 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4092 {
4093 if (vect_print_dump_info (REPORT_DETAILS))
4094 {
4095 fprintf (vect_dump, "not vectorized: the size of group "
4096 "of strided accesses is not a power of 2");
4097 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4098 }
4099
4100 return false;
4101 }
4102 }
4103
4104 return true;
4105 }
4106
4107
4108 /* Function vect_transform_stmt.
4109
4110 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4111
4112 bool
4113 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4114 bool *strided_store, slp_tree slp_node,
4115 slp_instance slp_node_instance)
4116 {
4117 bool is_store = false;
4118 gimple vec_stmt = NULL;
4119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4120 gimple orig_stmt_in_pattern;
4121 bool done;
4122
4123 switch (STMT_VINFO_TYPE (stmt_info))
4124 {
4125 case type_demotion_vec_info_type:
4126 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4127 gcc_assert (done);
4128 break;
4129
4130 case type_promotion_vec_info_type:
4131 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4132 gcc_assert (done);
4133 break;
4134
4135 case type_conversion_vec_info_type:
4136 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4137 gcc_assert (done);
4138 break;
4139
4140 case induc_vec_info_type:
4141 gcc_assert (!slp_node);
4142 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4143 gcc_assert (done);
4144 break;
4145
4146 case op_vec_info_type:
4147 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4148 gcc_assert (done);
4149 break;
4150
4151 case assignment_vec_info_type:
4152 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4153 gcc_assert (done);
4154 break;
4155
4156 case load_vec_info_type:
4157 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4158 slp_node_instance);
4159 gcc_assert (done);
4160 break;
4161
4162 case store_vec_info_type:
4163 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4164 gcc_assert (done);
4165 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4166 {
4167 /* In case of interleaving, the whole chain is vectorized when the
4168 last store in the chain is reached. Store stmts before the last
4169 one are skipped, and there vec_stmt_info shouldn't be freed
4170 meanwhile. */
4171 *strided_store = true;
4172 if (STMT_VINFO_VEC_STMT (stmt_info))
4173 is_store = true;
4174 }
4175 else
4176 is_store = true;
4177 break;
4178
4179 case condition_vec_info_type:
4180 gcc_assert (!slp_node);
4181 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
4182 gcc_assert (done);
4183 break;
4184
4185 case call_vec_info_type:
4186 gcc_assert (!slp_node);
4187 done = vectorizable_call (stmt, gsi, &vec_stmt);
4188 break;
4189
4190 case reduc_vec_info_type:
4191 gcc_assert (!slp_node);
4192 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4193 gcc_assert (done);
4194 break;
4195
4196 default:
4197 if (!STMT_VINFO_LIVE_P (stmt_info))
4198 {
4199 if (vect_print_dump_info (REPORT_DETAILS))
4200 fprintf (vect_dump, "stmt not supported.");
4201 gcc_unreachable ();
4202 }
4203 }
4204
4205 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4206 is being vectorized, but outside the immediately enclosing loop. */
4207 if (vec_stmt
4208 && STMT_VINFO_LOOP_VINFO (stmt_info)
4209 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4210 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
4211 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4212 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4213 || STMT_VINFO_RELEVANT (stmt_info) ==
4214 vect_used_in_outer_by_reduction))
4215 {
4216 struct loop *innerloop = LOOP_VINFO_LOOP (
4217 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
4218 imm_use_iterator imm_iter;
4219 use_operand_p use_p;
4220 tree scalar_dest;
4221 gimple exit_phi;
4222
4223 if (vect_print_dump_info (REPORT_DETAILS))
4224 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4225
4226 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4227 (to be used when vectorizing outer-loop stmts that use the DEF of
4228 STMT). */
4229 if (gimple_code (stmt) == GIMPLE_PHI)
4230 scalar_dest = PHI_RESULT (stmt);
4231 else
4232 scalar_dest = gimple_assign_lhs (stmt);
4233
4234 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4235 {
4236 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4237 {
4238 exit_phi = USE_STMT (use_p);
4239 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4240 }
4241 }
4242 }
4243
4244 /* Handle stmts whose DEF is used outside the loop-nest that is
4245 being vectorized. */
4246 if (STMT_VINFO_LIVE_P (stmt_info)
4247 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4248 {
4249 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4250 gcc_assert (done);
4251 }
4252
4253 if (vec_stmt)
4254 {
4255 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4256 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4257 if (orig_stmt_in_pattern)
4258 {
4259 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4260 /* STMT was inserted by the vectorizer to replace a computation idiom.
4261 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4262 computed this idiom. We need to record a pointer to VEC_STMT in
4263 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4264 documentation of vect_pattern_recog. */
4265 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4266 {
4267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4268 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4269 }
4270 }
4271 }
4272
4273 return is_store;
4274 }
4275
4276
4277 /* Remove a group of stores (for SLP or interleaving), free their
4278 stmt_vec_info. */
4279
4280 void
4281 vect_remove_stores (gimple first_stmt)
4282 {
4283 gimple next = first_stmt;
4284 gimple tmp;
4285 gimple_stmt_iterator next_si;
4286
4287 while (next)
4288 {
4289 /* Free the attached stmt_vec_info and remove the stmt. */
4290 next_si = gsi_for_stmt (next);
4291 gsi_remove (&next_si, true);
4292 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4293 free_stmt_vec_info (next);
4294 next = tmp;
4295 }
4296 }
4297
4298
4299 /* Function new_stmt_vec_info.
4300
4301 Create and initialize a new stmt_vec_info struct for STMT. */
4302
4303 stmt_vec_info
4304 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
4305 bb_vec_info bb_vinfo)
4306 {
4307 stmt_vec_info res;
4308 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4309
4310 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4311 STMT_VINFO_STMT (res) = stmt;
4312 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4313 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
4314 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4315 STMT_VINFO_LIVE_P (res) = false;
4316 STMT_VINFO_VECTYPE (res) = NULL;
4317 STMT_VINFO_VEC_STMT (res) = NULL;
4318 STMT_VINFO_IN_PATTERN_P (res) = false;
4319 STMT_VINFO_RELATED_STMT (res) = NULL;
4320 STMT_VINFO_DATA_REF (res) = NULL;
4321
4322 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4323 STMT_VINFO_DR_OFFSET (res) = NULL;
4324 STMT_VINFO_DR_INIT (res) = NULL;
4325 STMT_VINFO_DR_STEP (res) = NULL;
4326 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4327
4328 if (gimple_code (stmt) == GIMPLE_PHI
4329 && is_loop_header_bb_p (gimple_bb (stmt)))
4330 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4331 else
4332 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4333
4334 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4335 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4336 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4337 STMT_SLP_TYPE (res) = loop_vect;
4338 DR_GROUP_FIRST_DR (res) = NULL;
4339 DR_GROUP_NEXT_DR (res) = NULL;
4340 DR_GROUP_SIZE (res) = 0;
4341 DR_GROUP_STORE_COUNT (res) = 0;
4342 DR_GROUP_GAP (res) = 0;
4343 DR_GROUP_SAME_DR_STMT (res) = NULL;
4344 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4345
4346 return res;
4347 }
4348
4349
4350 /* Create a hash table for stmt_vec_info. */
4351
4352 void
4353 init_stmt_vec_info_vec (void)
4354 {
4355 gcc_assert (!stmt_vec_info_vec);
4356 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4357 }
4358
4359
4360 /* Free hash table for stmt_vec_info. */
4361
4362 void
4363 free_stmt_vec_info_vec (void)
4364 {
4365 gcc_assert (stmt_vec_info_vec);
4366 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4367 }
4368
4369
4370 /* Free stmt vectorization related info. */
4371
4372 void
4373 free_stmt_vec_info (gimple stmt)
4374 {
4375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4376
4377 if (!stmt_info)
4378 return;
4379
4380 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4381 set_vinfo_for_stmt (stmt, NULL);
4382 free (stmt_info);
4383 }
4384
4385
4386 /* Function get_vectype_for_scalar_type.
4387
4388 Returns the vector type corresponding to SCALAR_TYPE as supported
4389 by the target. */
4390
4391 tree
4392 get_vectype_for_scalar_type (tree scalar_type)
4393 {
4394 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4395 int nbytes = GET_MODE_SIZE (inner_mode);
4396 int nunits;
4397 tree vectype;
4398
4399 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4400 return NULL_TREE;
4401
4402 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4403 is expected. */
4404 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4405
4406 vectype = build_vector_type (scalar_type, nunits);
4407 if (vect_print_dump_info (REPORT_DETAILS))
4408 {
4409 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4410 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4411 }
4412
4413 if (!vectype)
4414 return NULL_TREE;
4415
4416 if (vect_print_dump_info (REPORT_DETAILS))
4417 {
4418 fprintf (vect_dump, "vectype: ");
4419 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4420 }
4421
4422 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4423 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4424 {
4425 if (vect_print_dump_info (REPORT_DETAILS))
4426 fprintf (vect_dump, "mode not supported by target.");
4427 return NULL_TREE;
4428 }
4429
4430 return vectype;
4431 }
4432
4433 /* Function vect_is_simple_use.
4434
4435 Input:
4436 LOOP_VINFO - the vect info of the loop that is being vectorized.
4437 BB_VINFO - the vect info of the basic block that is being vectorized.
4438 OPERAND - operand of a stmt in the loop or bb.
4439 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4440
4441 Returns whether a stmt with OPERAND can be vectorized.
4442 For loops, supportable operands are constants, loop invariants, and operands
4443 that are defined by the current iteration of the loop. Unsupportable
4444 operands are those that are defined by a previous iteration of the loop (as
4445 is the case in reduction/induction computations).
4446 For basic blocks, supportable operands are constants and bb invariants.
4447 For now, operands defined outside the basic block are not supported. */
4448
4449 bool
4450 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
4451 bb_vec_info bb_vinfo, gimple *def_stmt,
4452 tree *def, enum vect_def_type *dt)
4453 {
4454 basic_block bb;
4455 stmt_vec_info stmt_vinfo;
4456 struct loop *loop = NULL;
4457
4458 if (loop_vinfo)
4459 loop = LOOP_VINFO_LOOP (loop_vinfo);
4460
4461 *def_stmt = NULL;
4462 *def = NULL_TREE;
4463
4464 if (vect_print_dump_info (REPORT_DETAILS))
4465 {
4466 fprintf (vect_dump, "vect_is_simple_use: operand ");
4467 print_generic_expr (vect_dump, operand, TDF_SLIM);
4468 }
4469
4470 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4471 {
4472 *dt = vect_constant_def;
4473 return true;
4474 }
4475
4476 if (is_gimple_min_invariant (operand))
4477 {
4478 *def = operand;
4479 *dt = vect_external_def;
4480 return true;
4481 }
4482
4483 if (TREE_CODE (operand) == PAREN_EXPR)
4484 {
4485 if (vect_print_dump_info (REPORT_DETAILS))
4486 fprintf (vect_dump, "non-associatable copy.");
4487 operand = TREE_OPERAND (operand, 0);
4488 }
4489
4490 if (TREE_CODE (operand) != SSA_NAME)
4491 {
4492 if (vect_print_dump_info (REPORT_DETAILS))
4493 fprintf (vect_dump, "not ssa-name.");
4494 return false;
4495 }
4496
4497 *def_stmt = SSA_NAME_DEF_STMT (operand);
4498 if (*def_stmt == NULL)
4499 {
4500 if (vect_print_dump_info (REPORT_DETAILS))
4501 fprintf (vect_dump, "no def_stmt.");
4502 return false;
4503 }
4504
4505 if (vect_print_dump_info (REPORT_DETAILS))
4506 {
4507 fprintf (vect_dump, "def_stmt: ");
4508 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4509 }
4510
4511 /* Empty stmt is expected only in case of a function argument.
4512 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4513 if (gimple_nop_p (*def_stmt))
4514 {
4515 *def = operand;
4516 *dt = vect_external_def;
4517 return true;
4518 }
4519
4520 bb = gimple_bb (*def_stmt);
4521
4522 if ((loop && !flow_bb_inside_loop_p (loop, bb))
4523 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
4524 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
4525 *dt = vect_external_def;
4526 else
4527 {
4528 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4529 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4530 }
4531
4532 if (*dt == vect_unknown_def_type)
4533 {
4534 if (vect_print_dump_info (REPORT_DETAILS))
4535 fprintf (vect_dump, "Unsupported pattern.");
4536 return false;
4537 }
4538
4539 if (vect_print_dump_info (REPORT_DETAILS))
4540 fprintf (vect_dump, "type of def: %d.",*dt);
4541
4542 switch (gimple_code (*def_stmt))
4543 {
4544 case GIMPLE_PHI:
4545 *def = gimple_phi_result (*def_stmt);
4546 break;
4547
4548 case GIMPLE_ASSIGN:
4549 *def = gimple_assign_lhs (*def_stmt);
4550 break;
4551
4552 case GIMPLE_CALL:
4553 *def = gimple_call_lhs (*def_stmt);
4554 if (*def != NULL)
4555 break;
4556 /* FALLTHRU */
4557 default:
4558 if (vect_print_dump_info (REPORT_DETAILS))
4559 fprintf (vect_dump, "unsupported defining stmt: ");
4560 return false;
4561 }
4562
4563 return true;
4564 }
4565
4566
4567 /* Function supportable_widening_operation
4568
4569 Check whether an operation represented by the code CODE is a
4570 widening operation that is supported by the target platform in
4571 vector form (i.e., when operating on arguments of type VECTYPE).
4572
4573 Widening operations we currently support are NOP (CONVERT), FLOAT
4574 and WIDEN_MULT. This function checks if these operations are supported
4575 by the target platform either directly (via vector tree-codes), or via
4576 target builtins.
4577
4578 Output:
4579 - CODE1 and CODE2 are codes of vector operations to be used when
4580 vectorizing the operation, if available.
4581 - DECL1 and DECL2 are decls of target builtin functions to be used
4582 when vectorizing the operation, if available. In this case,
4583 CODE1 and CODE2 are CALL_EXPR.
4584 - MULTI_STEP_CVT determines the number of required intermediate steps in
4585 case of multi-step conversion (like char->short->int - in that case
4586 MULTI_STEP_CVT will be 1).
4587 - INTERM_TYPES contains the intermediate type required to perform the
4588 widening operation (short in the above example). */
4589
4590 bool
4591 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4592 tree *decl1, tree *decl2,
4593 enum tree_code *code1, enum tree_code *code2,
4594 int *multi_step_cvt,
4595 VEC (tree, heap) **interm_types)
4596 {
4597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4598 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4599 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4600 bool ordered_p;
4601 enum machine_mode vec_mode;
4602 enum insn_code icode1, icode2;
4603 optab optab1, optab2;
4604 tree type = gimple_expr_type (stmt);
4605 tree wide_vectype = get_vectype_for_scalar_type (type);
4606 enum tree_code c1, c2;
4607
4608 /* The result of a vectorized widening operation usually requires two vectors
4609 (because the widened results do not fit int one vector). The generated
4610 vector results would normally be expected to be generated in the same
4611 order as in the original scalar computation, i.e. if 8 results are
4612 generated in each vector iteration, they are to be organized as follows:
4613 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4614
4615 However, in the special case that the result of the widening operation is
4616 used in a reduction computation only, the order doesn't matter (because
4617 when vectorizing a reduction we change the order of the computation).
4618 Some targets can take advantage of this and generate more efficient code.
4619 For example, targets like Altivec, that support widen_mult using a sequence
4620 of {mult_even,mult_odd} generate the following vectors:
4621 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4622
4623 When vectorizing outer-loops, we execute the inner-loop sequentially
4624 (each vectorized inner-loop iteration contributes to VF outer-loop
4625 iterations in parallel). We therefore don't allow to change the order
4626 of the computation in the inner-loop during outer-loop vectorization. */
4627
4628 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4629 && !nested_in_vect_loop_p (vect_loop, stmt))
4630 ordered_p = false;
4631 else
4632 ordered_p = true;
4633
4634 if (!ordered_p
4635 && code == WIDEN_MULT_EXPR
4636 && targetm.vectorize.builtin_mul_widen_even
4637 && targetm.vectorize.builtin_mul_widen_even (vectype)
4638 && targetm.vectorize.builtin_mul_widen_odd
4639 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4640 {
4641 if (vect_print_dump_info (REPORT_DETAILS))
4642 fprintf (vect_dump, "Unordered widening operation detected.");
4643
4644 *code1 = *code2 = CALL_EXPR;
4645 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4646 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4647 return true;
4648 }
4649
4650 switch (code)
4651 {
4652 case WIDEN_MULT_EXPR:
4653 if (BYTES_BIG_ENDIAN)
4654 {
4655 c1 = VEC_WIDEN_MULT_HI_EXPR;
4656 c2 = VEC_WIDEN_MULT_LO_EXPR;
4657 }
4658 else
4659 {
4660 c2 = VEC_WIDEN_MULT_HI_EXPR;
4661 c1 = VEC_WIDEN_MULT_LO_EXPR;
4662 }
4663 break;
4664
4665 CASE_CONVERT:
4666 if (BYTES_BIG_ENDIAN)
4667 {
4668 c1 = VEC_UNPACK_HI_EXPR;
4669 c2 = VEC_UNPACK_LO_EXPR;
4670 }
4671 else
4672 {
4673 c2 = VEC_UNPACK_HI_EXPR;
4674 c1 = VEC_UNPACK_LO_EXPR;
4675 }
4676 break;
4677
4678 case FLOAT_EXPR:
4679 if (BYTES_BIG_ENDIAN)
4680 {
4681 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4682 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4683 }
4684 else
4685 {
4686 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4687 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4688 }
4689 break;
4690
4691 case FIX_TRUNC_EXPR:
4692 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4693 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4694 computing the operation. */
4695 return false;
4696
4697 default:
4698 gcc_unreachable ();
4699 }
4700
4701 if (code == FIX_TRUNC_EXPR)
4702 {
4703 /* The signedness is determined from output operand. */
4704 optab1 = optab_for_tree_code (c1, type, optab_default);
4705 optab2 = optab_for_tree_code (c2, type, optab_default);
4706 }
4707 else
4708 {
4709 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4710 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4711 }
4712
4713 if (!optab1 || !optab2)
4714 return false;
4715
4716 vec_mode = TYPE_MODE (vectype);
4717 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4718 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4719 == CODE_FOR_nothing)
4720 return false;
4721
4722 /* Check if it's a multi-step conversion that can be done using intermediate
4723 types. */
4724 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4725 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4726 {
4727 int i;
4728 tree prev_type = vectype, intermediate_type;
4729 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4730 optab optab3, optab4;
4731
4732 if (!CONVERT_EXPR_CODE_P (code))
4733 return false;
4734
4735 *code1 = c1;
4736 *code2 = c2;
4737
4738 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4739 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4740 to get to NARROW_VECTYPE, and fail if we do not. */
4741 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4742 for (i = 0; i < 3; i++)
4743 {
4744 intermediate_mode = insn_data[icode1].operand[0].mode;
4745 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4746 TYPE_UNSIGNED (prev_type));
4747 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4748 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4749
4750 if (!optab3 || !optab4
4751 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4752 == CODE_FOR_nothing
4753 || insn_data[icode1].operand[0].mode != intermediate_mode
4754 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4755 == CODE_FOR_nothing
4756 || insn_data[icode2].operand[0].mode != intermediate_mode
4757 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4758 == CODE_FOR_nothing
4759 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4760 == CODE_FOR_nothing)
4761 return false;
4762
4763 VEC_quick_push (tree, *interm_types, intermediate_type);
4764 (*multi_step_cvt)++;
4765
4766 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4767 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4768 return true;
4769
4770 prev_type = intermediate_type;
4771 prev_mode = intermediate_mode;
4772 }
4773
4774 return false;
4775 }
4776
4777 *code1 = c1;
4778 *code2 = c2;
4779 return true;
4780 }
4781
4782
4783 /* Function supportable_narrowing_operation
4784
4785 Check whether an operation represented by the code CODE is a
4786 narrowing operation that is supported by the target platform in
4787 vector form (i.e., when operating on arguments of type VECTYPE).
4788
4789 Narrowing operations we currently support are NOP (CONVERT) and
4790 FIX_TRUNC. This function checks if these operations are supported by
4791 the target platform directly via vector tree-codes.
4792
4793 Output:
4794 - CODE1 is the code of a vector operation to be used when
4795 vectorizing the operation, if available.
4796 - MULTI_STEP_CVT determines the number of required intermediate steps in
4797 case of multi-step conversion (like int->short->char - in that case
4798 MULTI_STEP_CVT will be 1).
4799 - INTERM_TYPES contains the intermediate type required to perform the
4800 narrowing operation (short in the above example). */
4801
4802 bool
4803 supportable_narrowing_operation (enum tree_code code,
4804 const_gimple stmt, tree vectype,
4805 enum tree_code *code1, int *multi_step_cvt,
4806 VEC (tree, heap) **interm_types)
4807 {
4808 enum machine_mode vec_mode;
4809 enum insn_code icode1;
4810 optab optab1, interm_optab;
4811 tree type = gimple_expr_type (stmt);
4812 tree narrow_vectype = get_vectype_for_scalar_type (type);
4813 enum tree_code c1;
4814 tree intermediate_type, prev_type;
4815 int i;
4816
4817 switch (code)
4818 {
4819 CASE_CONVERT:
4820 c1 = VEC_PACK_TRUNC_EXPR;
4821 break;
4822
4823 case FIX_TRUNC_EXPR:
4824 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4825 break;
4826
4827 case FLOAT_EXPR:
4828 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4829 tree code and optabs used for computing the operation. */
4830 return false;
4831
4832 default:
4833 gcc_unreachable ();
4834 }
4835
4836 if (code == FIX_TRUNC_EXPR)
4837 /* The signedness is determined from output operand. */
4838 optab1 = optab_for_tree_code (c1, type, optab_default);
4839 else
4840 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4841
4842 if (!optab1)
4843 return false;
4844
4845 vec_mode = TYPE_MODE (vectype);
4846 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4847 == CODE_FOR_nothing)
4848 return false;
4849
4850 /* Check if it's a multi-step conversion that can be done using intermediate
4851 types. */
4852 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4853 {
4854 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4855
4856 *code1 = c1;
4857 prev_type = vectype;
4858 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4859 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4860 to get to NARROW_VECTYPE, and fail if we do not. */
4861 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4862 for (i = 0; i < 3; i++)
4863 {
4864 intermediate_mode = insn_data[icode1].operand[0].mode;
4865 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4866 TYPE_UNSIGNED (prev_type));
4867 interm_optab = optab_for_tree_code (c1, intermediate_type,
4868 optab_default);
4869 if (!interm_optab
4870 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4871 == CODE_FOR_nothing
4872 || insn_data[icode1].operand[0].mode != intermediate_mode
4873 || (icode1
4874 = interm_optab->handlers[(int) intermediate_mode].insn_code)
4875 == CODE_FOR_nothing)
4876 return false;
4877
4878 VEC_quick_push (tree, *interm_types, intermediate_type);
4879 (*multi_step_cvt)++;
4880
4881 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4882 return true;
4883
4884 prev_type = intermediate_type;
4885 prev_mode = intermediate_mode;
4886 }
4887
4888 return false;
4889 }
4890
4891 *code1 = c1;
4892 return true;
4893 }