]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-stmts.c
revert: tree.h (phi_arg_d): New field.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "diagnostic-core.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42
43
44 /* Return the vectorized type for the given statement. */
45
46 tree
47 stmt_vectype (struct _stmt_vec_info *stmt_info)
48 {
49 return STMT_VINFO_VECTYPE (stmt_info);
50 }
51
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54 bool
55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
56 {
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
61
62 if (!loop_vinfo)
63 return false;
64
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
66
67 return (bb->loop_father == loop->inner);
68 }
69
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
73
74 unsigned
75 record_stmt_cost (stmt_vector_for_cost *stmt_cost_vec, int count,
76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 int misalign)
78 {
79 if (stmt_cost_vec)
80 {
81 tree vectype = stmt_vectype (stmt_info);
82 add_stmt_info_to_vec (stmt_cost_vec, count, kind,
83 STMT_VINFO_STMT (stmt_info), misalign);
84 return (unsigned)
85 (targetm.vectorize.builtin_vectorization_cost (kind, vectype, misalign)
86 * count);
87
88 }
89 else
90 {
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
94
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
99
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info, misalign);
101 }
102 }
103
104 /* Return a variable of type ELEM_TYPE[NELEMS]. */
105
106 static tree
107 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
108 {
109 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
110 "vect_array");
111 }
112
113 /* ARRAY is an array of vectors created by create_vector_array.
114 Return an SSA_NAME for the vector in index N. The reference
115 is part of the vectorization of STMT and the vector is associated
116 with scalar destination SCALAR_DEST. */
117
118 static tree
119 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
120 tree array, unsigned HOST_WIDE_INT n)
121 {
122 tree vect_type, vect, vect_name, array_ref;
123 gimple new_stmt;
124
125 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
126 vect_type = TREE_TYPE (TREE_TYPE (array));
127 vect = vect_create_destination_var (scalar_dest, vect_type);
128 array_ref = build4 (ARRAY_REF, vect_type, array,
129 build_int_cst (size_type_node, n),
130 NULL_TREE, NULL_TREE);
131
132 new_stmt = gimple_build_assign (vect, array_ref);
133 vect_name = make_ssa_name (vect, new_stmt);
134 gimple_assign_set_lhs (new_stmt, vect_name);
135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
136
137 return vect_name;
138 }
139
140 /* ARRAY is an array of vectors created by create_vector_array.
141 Emit code to store SSA_NAME VECT in index N of the array.
142 The store is part of the vectorization of STMT. */
143
144 static void
145 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
146 tree array, unsigned HOST_WIDE_INT n)
147 {
148 tree array_ref;
149 gimple new_stmt;
150
151 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
152 build_int_cst (size_type_node, n),
153 NULL_TREE, NULL_TREE);
154
155 new_stmt = gimple_build_assign (array_ref, vect);
156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
157 }
158
159 /* PTR is a pointer to an array of type TYPE. Return a representation
160 of *PTR. The memory reference replaces those in FIRST_DR
161 (and its group). */
162
163 static tree
164 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
165 {
166 tree mem_ref, alias_ptr_type;
167
168 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
169 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
170 /* Arrays have the same alignment as their type. */
171 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
172 return mem_ref;
173 }
174
175 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
176
177 /* Function vect_mark_relevant.
178
179 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
180
181 static void
182 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
183 enum vect_relevant relevant, bool live_p,
184 bool used_in_pattern)
185 {
186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
187 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
188 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
189 gimple pattern_stmt;
190
191 if (vect_print_dump_info (REPORT_DETAILS))
192 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
193
194 /* If this stmt is an original stmt in a pattern, we might need to mark its
195 related pattern stmt instead of the original stmt. However, such stmts
196 may have their own uses that are not in any pattern, in such cases the
197 stmt itself should be marked. */
198 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
199 {
200 bool found = false;
201 if (!used_in_pattern)
202 {
203 imm_use_iterator imm_iter;
204 use_operand_p use_p;
205 gimple use_stmt;
206 tree lhs;
207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
208 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
209
210 if (is_gimple_assign (stmt))
211 lhs = gimple_assign_lhs (stmt);
212 else
213 lhs = gimple_call_lhs (stmt);
214
215 /* This use is out of pattern use, if LHS has other uses that are
216 pattern uses, we should mark the stmt itself, and not the pattern
217 stmt. */
218 if (TREE_CODE (lhs) == SSA_NAME)
219 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
220 {
221 if (is_gimple_debug (USE_STMT (use_p)))
222 continue;
223 use_stmt = USE_STMT (use_p);
224
225 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
226 continue;
227
228 if (vinfo_for_stmt (use_stmt)
229 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
230 {
231 found = true;
232 break;
233 }
234 }
235 }
236
237 if (!found)
238 {
239 /* This is the last stmt in a sequence that was detected as a
240 pattern that can potentially be vectorized. Don't mark the stmt
241 as relevant/live because it's not going to be vectorized.
242 Instead mark the pattern-stmt that replaces it. */
243
244 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
245
246 if (vect_print_dump_info (REPORT_DETAILS))
247 fprintf (vect_dump, "last stmt in pattern. don't mark"
248 " relevant/live.");
249 stmt_info = vinfo_for_stmt (pattern_stmt);
250 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
251 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
252 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
253 stmt = pattern_stmt;
254 }
255 }
256
257 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
258 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
259 STMT_VINFO_RELEVANT (stmt_info) = relevant;
260
261 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
262 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
263 {
264 if (vect_print_dump_info (REPORT_DETAILS))
265 fprintf (vect_dump, "already marked relevant/live.");
266 return;
267 }
268
269 VEC_safe_push (gimple, heap, *worklist, stmt);
270 }
271
272
273 /* Function vect_stmt_relevant_p.
274
275 Return true if STMT in loop that is represented by LOOP_VINFO is
276 "relevant for vectorization".
277
278 A stmt is considered "relevant for vectorization" if:
279 - it has uses outside the loop.
280 - it has vdefs (it alters memory).
281 - control stmts in the loop (except for the exit condition).
282
283 CHECKME: what other side effects would the vectorizer allow? */
284
285 static bool
286 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
287 enum vect_relevant *relevant, bool *live_p)
288 {
289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
290 ssa_op_iter op_iter;
291 imm_use_iterator imm_iter;
292 use_operand_p use_p;
293 def_operand_p def_p;
294
295 *relevant = vect_unused_in_scope;
296 *live_p = false;
297
298 /* cond stmt other than loop exit cond. */
299 if (is_ctrl_stmt (stmt)
300 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
301 != loop_exit_ctrl_vec_info_type)
302 *relevant = vect_used_in_scope;
303
304 /* changing memory. */
305 if (gimple_code (stmt) != GIMPLE_PHI)
306 if (gimple_vdef (stmt))
307 {
308 if (vect_print_dump_info (REPORT_DETAILS))
309 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
310 *relevant = vect_used_in_scope;
311 }
312
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
315 {
316 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
317 {
318 basic_block bb = gimple_bb (USE_STMT (use_p));
319 if (!flow_bb_inside_loop_p (loop, bb))
320 {
321 if (vect_print_dump_info (REPORT_DETAILS))
322 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
323
324 if (is_gimple_debug (USE_STMT (use_p)))
325 continue;
326
327 /* We expect all such uses to be in the loop exit phis
328 (because of loop closed form) */
329 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
330 gcc_assert (bb == single_exit (loop)->dest);
331
332 *live_p = true;
333 }
334 }
335 }
336
337 return (*live_p || *relevant);
338 }
339
340
341 /* Function exist_non_indexing_operands_for_use_p
342
343 USE is one of the uses attached to STMT. Check if USE is
344 used in STMT for anything other than indexing an array. */
345
346 static bool
347 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
348 {
349 tree operand;
350 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
351
352 /* USE corresponds to some operand in STMT. If there is no data
353 reference in STMT, then any operand that corresponds to USE
354 is not indexing an array. */
355 if (!STMT_VINFO_DATA_REF (stmt_info))
356 return true;
357
358 /* STMT has a data_ref. FORNOW this means that its of one of
359 the following forms:
360 -1- ARRAY_REF = var
361 -2- var = ARRAY_REF
362 (This should have been verified in analyze_data_refs).
363
364 'var' in the second case corresponds to a def, not a use,
365 so USE cannot correspond to any operands that are not used
366 for array indexing.
367
368 Therefore, all we need to check is if STMT falls into the
369 first case, and whether var corresponds to USE. */
370
371 if (!gimple_assign_copy_p (stmt))
372 return false;
373 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
374 return false;
375 operand = gimple_assign_rhs1 (stmt);
376 if (TREE_CODE (operand) != SSA_NAME)
377 return false;
378
379 if (operand == use)
380 return true;
381
382 return false;
383 }
384
385
386 /*
387 Function process_use.
388
389 Inputs:
390 - a USE in STMT in a loop represented by LOOP_VINFO
391 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
392 that defined USE. This is done by calling mark_relevant and passing it
393 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
394 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
395 be performed.
396
397 Outputs:
398 Generally, LIVE_P and RELEVANT are used to define the liveness and
399 relevance info of the DEF_STMT of this USE:
400 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
401 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
402 Exceptions:
403 - case 1: If USE is used only for address computations (e.g. array indexing),
404 which does not need to be directly vectorized, then the liveness/relevance
405 of the respective DEF_STMT is left unchanged.
406 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
407 skip DEF_STMT cause it had already been processed.
408 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
409 be modified accordingly.
410
411 Return true if everything is as expected. Return false otherwise. */
412
413 static bool
414 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
415 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
416 bool force)
417 {
418 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
419 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
420 stmt_vec_info dstmt_vinfo;
421 basic_block bb, def_bb;
422 tree def;
423 gimple def_stmt;
424 enum vect_def_type dt;
425
426 /* case 1: we are only interested in uses that need to be vectorized. Uses
427 that are used for address computation are not considered relevant. */
428 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
429 return true;
430
431 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
432 {
433 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
434 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
435 return false;
436 }
437
438 if (!def_stmt || gimple_nop_p (def_stmt))
439 return true;
440
441 def_bb = gimple_bb (def_stmt);
442 if (!flow_bb_inside_loop_p (loop, def_bb))
443 {
444 if (vect_print_dump_info (REPORT_DETAILS))
445 fprintf (vect_dump, "def_stmt is out of loop.");
446 return true;
447 }
448
449 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
450 DEF_STMT must have already been processed, because this should be the
451 only way that STMT, which is a reduction-phi, was put in the worklist,
452 as there should be no other uses for DEF_STMT in the loop. So we just
453 check that everything is as expected, and we are done. */
454 dstmt_vinfo = vinfo_for_stmt (def_stmt);
455 bb = gimple_bb (stmt);
456 if (gimple_code (stmt) == GIMPLE_PHI
457 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
458 && gimple_code (def_stmt) != GIMPLE_PHI
459 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
460 && bb->loop_father == def_bb->loop_father)
461 {
462 if (vect_print_dump_info (REPORT_DETAILS))
463 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
464 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
465 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
466 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
467 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
468 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
469 return true;
470 }
471
472 /* case 3a: outer-loop stmt defining an inner-loop stmt:
473 outer-loop-header-bb:
474 d = def_stmt
475 inner-loop:
476 stmt # use (d)
477 outer-loop-tail-bb:
478 ... */
479 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
480 {
481 if (vect_print_dump_info (REPORT_DETAILS))
482 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
483
484 switch (relevant)
485 {
486 case vect_unused_in_scope:
487 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
488 vect_used_in_scope : vect_unused_in_scope;
489 break;
490
491 case vect_used_in_outer_by_reduction:
492 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
493 relevant = vect_used_by_reduction;
494 break;
495
496 case vect_used_in_outer:
497 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
498 relevant = vect_used_in_scope;
499 break;
500
501 case vect_used_in_scope:
502 break;
503
504 default:
505 gcc_unreachable ();
506 }
507 }
508
509 /* case 3b: inner-loop stmt defining an outer-loop stmt:
510 outer-loop-header-bb:
511 ...
512 inner-loop:
513 d = def_stmt
514 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
515 stmt # use (d) */
516 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
517 {
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
520
521 switch (relevant)
522 {
523 case vect_unused_in_scope:
524 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
525 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
526 vect_used_in_outer_by_reduction : vect_unused_in_scope;
527 break;
528
529 case vect_used_by_reduction:
530 relevant = vect_used_in_outer_by_reduction;
531 break;
532
533 case vect_used_in_scope:
534 relevant = vect_used_in_outer;
535 break;
536
537 default:
538 gcc_unreachable ();
539 }
540 }
541
542 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
543 is_pattern_stmt_p (stmt_vinfo));
544 return true;
545 }
546
547
548 /* Function vect_mark_stmts_to_be_vectorized.
549
550 Not all stmts in the loop need to be vectorized. For example:
551
552 for i...
553 for j...
554 1. T0 = i + j
555 2. T1 = a[T0]
556
557 3. j = j + 1
558
559 Stmt 1 and 3 do not need to be vectorized, because loop control and
560 addressing of vectorized data-refs are handled differently.
561
562 This pass detects such stmts. */
563
564 bool
565 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
566 {
567 VEC(gimple,heap) *worklist;
568 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
569 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
570 unsigned int nbbs = loop->num_nodes;
571 gimple_stmt_iterator si;
572 gimple stmt;
573 unsigned int i;
574 stmt_vec_info stmt_vinfo;
575 basic_block bb;
576 gimple phi;
577 bool live_p;
578 enum vect_relevant relevant, tmp_relevant;
579 enum vect_def_type def_type;
580
581 if (vect_print_dump_info (REPORT_DETAILS))
582 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
583
584 worklist = VEC_alloc (gimple, heap, 64);
585
586 /* 1. Init worklist. */
587 for (i = 0; i < nbbs; i++)
588 {
589 bb = bbs[i];
590 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
591 {
592 phi = gsi_stmt (si);
593 if (vect_print_dump_info (REPORT_DETAILS))
594 {
595 fprintf (vect_dump, "init: phi relevant? ");
596 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
597 }
598
599 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
600 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
601 }
602 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
603 {
604 stmt = gsi_stmt (si);
605 if (vect_print_dump_info (REPORT_DETAILS))
606 {
607 fprintf (vect_dump, "init: stmt relevant? ");
608 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
609 }
610
611 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
612 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
613 }
614 }
615
616 /* 2. Process_worklist */
617 while (VEC_length (gimple, worklist) > 0)
618 {
619 use_operand_p use_p;
620 ssa_op_iter iter;
621
622 stmt = VEC_pop (gimple, worklist);
623 if (vect_print_dump_info (REPORT_DETAILS))
624 {
625 fprintf (vect_dump, "worklist: examine stmt: ");
626 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
627 }
628
629 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
630 (DEF_STMT) as relevant/irrelevant and live/dead according to the
631 liveness and relevance properties of STMT. */
632 stmt_vinfo = vinfo_for_stmt (stmt);
633 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
634 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
635
636 /* Generally, the liveness and relevance properties of STMT are
637 propagated as is to the DEF_STMTs of its USEs:
638 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
639 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
640
641 One exception is when STMT has been identified as defining a reduction
642 variable; in this case we set the liveness/relevance as follows:
643 live_p = false
644 relevant = vect_used_by_reduction
645 This is because we distinguish between two kinds of relevant stmts -
646 those that are used by a reduction computation, and those that are
647 (also) used by a regular computation. This allows us later on to
648 identify stmts that are used solely by a reduction, and therefore the
649 order of the results that they produce does not have to be kept. */
650
651 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
652 tmp_relevant = relevant;
653 switch (def_type)
654 {
655 case vect_reduction_def:
656 switch (tmp_relevant)
657 {
658 case vect_unused_in_scope:
659 relevant = vect_used_by_reduction;
660 break;
661
662 case vect_used_by_reduction:
663 if (gimple_code (stmt) == GIMPLE_PHI)
664 break;
665 /* fall through */
666
667 default:
668 if (vect_print_dump_info (REPORT_DETAILS))
669 fprintf (vect_dump, "unsupported use of reduction.");
670
671 VEC_free (gimple, heap, worklist);
672 return false;
673 }
674
675 live_p = false;
676 break;
677
678 case vect_nested_cycle:
679 if (tmp_relevant != vect_unused_in_scope
680 && tmp_relevant != vect_used_in_outer_by_reduction
681 && tmp_relevant != vect_used_in_outer)
682 {
683 if (vect_print_dump_info (REPORT_DETAILS))
684 fprintf (vect_dump, "unsupported use of nested cycle.");
685
686 VEC_free (gimple, heap, worklist);
687 return false;
688 }
689
690 live_p = false;
691 break;
692
693 case vect_double_reduction_def:
694 if (tmp_relevant != vect_unused_in_scope
695 && tmp_relevant != vect_used_by_reduction)
696 {
697 if (vect_print_dump_info (REPORT_DETAILS))
698 fprintf (vect_dump, "unsupported use of double reduction.");
699
700 VEC_free (gimple, heap, worklist);
701 return false;
702 }
703
704 live_p = false;
705 break;
706
707 default:
708 break;
709 }
710
711 if (is_pattern_stmt_p (stmt_vinfo))
712 {
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (is_gimple_assign (stmt))
717 {
718 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
719 tree op = gimple_assign_rhs1 (stmt);
720
721 i = 1;
722 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 {
724 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
725 live_p, relevant, &worklist, false)
726 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
727 live_p, relevant, &worklist, false))
728 {
729 VEC_free (gimple, heap, worklist);
730 return false;
731 }
732 i = 2;
733 }
734 for (; i < gimple_num_ops (stmt); i++)
735 {
736 op = gimple_op (stmt, i);
737 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
738 &worklist, false))
739 {
740 VEC_free (gimple, heap, worklist);
741 return false;
742 }
743 }
744 }
745 else if (is_gimple_call (stmt))
746 {
747 for (i = 0; i < gimple_call_num_args (stmt); i++)
748 {
749 tree arg = gimple_call_arg (stmt, i);
750 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
751 &worklist, false))
752 {
753 VEC_free (gimple, heap, worklist);
754 return false;
755 }
756 }
757 }
758 }
759 else
760 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
761 {
762 tree op = USE_FROM_PTR (use_p);
763 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
764 &worklist, false))
765 {
766 VEC_free (gimple, heap, worklist);
767 return false;
768 }
769 }
770
771 if (STMT_VINFO_GATHER_P (stmt_vinfo))
772 {
773 tree off;
774 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
775 gcc_assert (decl);
776 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
777 &worklist, true))
778 {
779 VEC_free (gimple, heap, worklist);
780 return false;
781 }
782 }
783 } /* while worklist */
784
785 VEC_free (gimple, heap, worklist);
786 return true;
787 }
788
789
790 /* Function vect_model_simple_cost.
791
792 Models cost for simple operations, i.e. those that only emit ncopies of a
793 single op. Right now, this does not account for multiple insns that could
794 be generated for the single vector op. We will handle that shortly. */
795
796 void
797 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
798 enum vect_def_type *dt, slp_tree slp_node,
799 stmt_vector_for_cost *stmt_cost_vec)
800 {
801 int i;
802 int inside_cost = 0, outside_cost = 0;
803
804 /* The SLP costs were already calculated during SLP tree build. */
805 if (PURE_SLP_STMT (stmt_info))
806 return;
807
808 /* FORNOW: Assuming maximum 2 args per stmts. */
809 for (i = 0; i < 2; i++)
810 {
811 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
812 outside_cost += vect_get_stmt_cost (vector_stmt);
813 }
814
815 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
816 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
817
818 /* Pass the inside-of-loop statements to the target-specific cost model. */
819 inside_cost = record_stmt_cost (stmt_cost_vec, ncopies, vector_stmt,
820 stmt_info, 0);
821
822 if (vect_print_dump_info (REPORT_COST))
823 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
824 "outside_cost = %d .", inside_cost, outside_cost);
825 }
826
827
828 /* Model cost for type demotion and promotion operations. PWR is normally
829 zero for single-step promotions and demotions. It will be one if
830 two-step promotion/demotion is required, and so on. Each additional
831 step doubles the number of instructions required. */
832
833 static void
834 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
835 enum vect_def_type *dt, int pwr)
836 {
837 int i, tmp;
838 int inside_cost = 0, outside_cost = 0;
839 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
840 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
841 void *target_cost_data;
842
843 /* The SLP costs were already calculated during SLP tree build. */
844 if (PURE_SLP_STMT (stmt_info))
845 return;
846
847 if (loop_vinfo)
848 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
849 else
850 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
851
852 for (i = 0; i < pwr + 1; i++)
853 {
854 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
855 (i + 1) : i;
856 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
857 vec_promote_demote, stmt_info, 0);
858 }
859
860 /* FORNOW: Assuming maximum 2 args per stmts. */
861 for (i = 0; i < 2; i++)
862 {
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 outside_cost += vect_get_stmt_cost (vector_stmt);
865 }
866
867 if (vect_print_dump_info (REPORT_COST))
868 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
869 "outside_cost = %d .", inside_cost, outside_cost);
870
871 /* Set the costs in STMT_INFO. */
872 stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
873 }
874
875 /* Function vect_cost_group_size
876
877 For grouped load or store, return the group_size only if it is the first
878 load or store of a group, else return 1. This ensures that group size is
879 only returned once per group. */
880
881 static int
882 vect_cost_group_size (stmt_vec_info stmt_info)
883 {
884 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
885
886 if (first_stmt == STMT_VINFO_STMT (stmt_info))
887 return GROUP_SIZE (stmt_info);
888
889 return 1;
890 }
891
892
893 /* Function vect_model_store_cost
894
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
897
898 void
899 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
900 bool store_lanes_p, enum vect_def_type dt,
901 slp_tree slp_node, stmt_vector_for_cost *stmt_cost_vec)
902 {
903 int group_size;
904 unsigned int inside_cost = 0, outside_cost = 0;
905 struct data_reference *first_dr;
906 gimple first_stmt;
907
908 /* The SLP costs were already calculated during SLP tree build. */
909 if (PURE_SLP_STMT (stmt_info))
910 return;
911
912 if (dt == vect_constant_def || dt == vect_external_def)
913 outside_cost = vect_get_stmt_cost (scalar_to_vec);
914
915 /* Grouped access? */
916 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
917 {
918 if (slp_node)
919 {
920 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
921 group_size = 1;
922 }
923 else
924 {
925 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
926 group_size = vect_cost_group_size (stmt_info);
927 }
928
929 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
930 }
931 /* Not a grouped access. */
932 else
933 {
934 group_size = 1;
935 first_dr = STMT_VINFO_DATA_REF (stmt_info);
936 }
937
938 /* We assume that the cost of a single store-lanes instruction is
939 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
940 access is instead being provided by a permute-and-store operation,
941 include the cost of the permutes. */
942 if (!store_lanes_p && group_size > 1)
943 {
944 /* Uses a high and low interleave operation for each needed permute. */
945
946 int nstmts = ncopies * exact_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (stmt_cost_vec, nstmts, vec_perm,
948 stmt_info, 0);
949
950 if (vect_print_dump_info (REPORT_COST))
951 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
952 group_size);
953 }
954
955 /* Costs of the stores. */
956 vect_get_store_cost (first_dr, ncopies, &inside_cost, stmt_cost_vec);
957
958 if (vect_print_dump_info (REPORT_COST))
959 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
960 "outside_cost = %d .", inside_cost, outside_cost);
961
962 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
963 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
964 }
965
966
967 /* Calculate cost of DR's memory access. */
968 void
969 vect_get_store_cost (struct data_reference *dr, int ncopies,
970 unsigned int *inside_cost,
971 stmt_vector_for_cost *stmt_cost_vec)
972 {
973 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
974 gimple stmt = DR_STMT (dr);
975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
976
977 switch (alignment_support_scheme)
978 {
979 case dr_aligned:
980 {
981 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
982 vector_store, stmt_info, 0);
983
984 if (vect_print_dump_info (REPORT_COST))
985 fprintf (vect_dump, "vect_model_store_cost: aligned.");
986
987 break;
988 }
989
990 case dr_unaligned_supported:
991 {
992 /* Here, we assign an additional cost for the unaligned store. */
993 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
994 unaligned_store, stmt_info,
995 DR_MISALIGNMENT (dr));
996
997 if (vect_print_dump_info (REPORT_COST))
998 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
999 "hardware.");
1000
1001 break;
1002 }
1003
1004 case dr_unaligned_unsupported:
1005 {
1006 *inside_cost = VECT_MAX_COST;
1007
1008 if (vect_print_dump_info (REPORT_COST))
1009 fprintf (vect_dump, "vect_model_store_cost: unsupported access.");
1010
1011 break;
1012 }
1013
1014 default:
1015 gcc_unreachable ();
1016 }
1017 }
1018
1019
1020 /* Function vect_model_load_cost
1021
1022 Models cost for loads. In the case of grouped accesses, the last access
1023 has the overhead of the grouped access attributed to it. Since unaligned
1024 accesses are supported for loads, we also account for the costs of the
1025 access scheme chosen. */
1026
1027 void
1028 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
1029 slp_tree slp_node, stmt_vector_for_cost *stmt_cost_vec)
1030 {
1031 int group_size;
1032 gimple first_stmt;
1033 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1034 unsigned int inside_cost = 0, outside_cost = 0;
1035
1036 /* The SLP costs were already calculated during SLP tree build. */
1037 if (PURE_SLP_STMT (stmt_info))
1038 return;
1039
1040 /* Grouped accesses? */
1041 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1042 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1043 {
1044 group_size = vect_cost_group_size (stmt_info);
1045 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1046 }
1047 /* Not a grouped access. */
1048 else
1049 {
1050 group_size = 1;
1051 first_dr = dr;
1052 }
1053
1054 /* We assume that the cost of a single load-lanes instruction is
1055 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1056 access is instead being provided by a load-and-permute operation,
1057 include the cost of the permutes. */
1058 if (!load_lanes_p && group_size > 1)
1059 {
1060 /* Uses an even and odd extract operations for each needed permute. */
1061 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1062 inside_cost += record_stmt_cost (stmt_cost_vec, nstmts, vec_perm,
1063 stmt_info, 0);
1064
1065 if (vect_print_dump_info (REPORT_COST))
1066 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1067 group_size);
1068 }
1069
1070 /* The loads themselves. */
1071 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1072 {
1073 /* N scalar loads plus gathering them into a vector. */
1074 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1075 inside_cost += record_stmt_cost (stmt_cost_vec,
1076 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1077 scalar_load, stmt_info, 0);
1078 inside_cost += record_stmt_cost (stmt_cost_vec, ncopies, vec_construct,
1079 stmt_info, 0);
1080 }
1081 else
1082 vect_get_load_cost (first_dr, ncopies,
1083 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1084 || group_size > 1 || slp_node),
1085 &inside_cost, &outside_cost, stmt_cost_vec);
1086
1087 if (vect_print_dump_info (REPORT_COST))
1088 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1089 "outside_cost = %d .", inside_cost, outside_cost);
1090
1091 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1092 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1093 }
1094
1095
1096 /* Calculate cost of DR's memory access. */
1097 void
1098 vect_get_load_cost (struct data_reference *dr, int ncopies,
1099 bool add_realign_cost, unsigned int *inside_cost,
1100 unsigned int *outside_cost,
1101 stmt_vector_for_cost *stmt_cost_vec)
1102 {
1103 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1104 gimple stmt = DR_STMT (dr);
1105 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1106
1107 switch (alignment_support_scheme)
1108 {
1109 case dr_aligned:
1110 {
1111 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1112 vector_load, stmt_info, 0);
1113
1114 if (vect_print_dump_info (REPORT_COST))
1115 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1116
1117 break;
1118 }
1119 case dr_unaligned_supported:
1120 {
1121 /* Here, we assign an additional cost for the unaligned load. */
1122 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1123 unaligned_load, stmt_info,
1124 DR_MISALIGNMENT (dr));
1125
1126 if (vect_print_dump_info (REPORT_COST))
1127 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1128 "hardware.");
1129
1130 break;
1131 }
1132 case dr_explicit_realign:
1133 {
1134 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies * 2,
1135 vector_load, stmt_info, 0);
1136 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1137 vec_perm, stmt_info, 0);
1138
1139 /* FIXME: If the misalignment remains fixed across the iterations of
1140 the containing loop, the following cost should be added to the
1141 outside costs. */
1142 if (targetm.vectorize.builtin_mask_for_load)
1143 *inside_cost += record_stmt_cost (stmt_cost_vec, 1, vector_stmt,
1144 stmt_info, 0);
1145
1146 if (vect_print_dump_info (REPORT_COST))
1147 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1148
1149 break;
1150 }
1151 case dr_explicit_realign_optimized:
1152 {
1153 if (vect_print_dump_info (REPORT_COST))
1154 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1155 "pipelined.");
1156
1157 /* Unaligned software pipeline has a load of an address, an initial
1158 load, and possibly a mask operation to "prime" the loop. However,
1159 if this is an access in a group of loads, which provide grouped
1160 access, then the above cost should only be considered for one
1161 access in the group. Inside the loop, there is a load op
1162 and a realignment op. */
1163
1164 if (add_realign_cost)
1165 {
1166 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1167 if (targetm.vectorize.builtin_mask_for_load)
1168 *outside_cost += vect_get_stmt_cost (vector_stmt);
1169 }
1170
1171 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1172 vector_load, stmt_info, 0);
1173 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1174 vec_perm, stmt_info, 0);
1175
1176 if (vect_print_dump_info (REPORT_COST))
1177 fprintf (vect_dump,
1178 "vect_model_load_cost: explicit realign optimized");
1179
1180 break;
1181 }
1182
1183 case dr_unaligned_unsupported:
1184 {
1185 *inside_cost = VECT_MAX_COST;
1186
1187 if (vect_print_dump_info (REPORT_COST))
1188 fprintf (vect_dump, "vect_model_load_cost: unsupported access.");
1189
1190 break;
1191 }
1192
1193 default:
1194 gcc_unreachable ();
1195 }
1196 }
1197
1198 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1199 the loop preheader for the vectorized stmt STMT. */
1200
1201 static void
1202 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1203 {
1204 if (gsi)
1205 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1206 else
1207 {
1208 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1210
1211 if (loop_vinfo)
1212 {
1213 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1214 basic_block new_bb;
1215 edge pe;
1216
1217 if (nested_in_vect_loop_p (loop, stmt))
1218 loop = loop->inner;
1219
1220 pe = loop_preheader_edge (loop);
1221 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1222 gcc_assert (!new_bb);
1223 }
1224 else
1225 {
1226 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1227 basic_block bb;
1228 gimple_stmt_iterator gsi_bb_start;
1229
1230 gcc_assert (bb_vinfo);
1231 bb = BB_VINFO_BB (bb_vinfo);
1232 gsi_bb_start = gsi_after_labels (bb);
1233 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1234 }
1235 }
1236
1237 if (vect_print_dump_info (REPORT_DETAILS))
1238 {
1239 fprintf (vect_dump, "created new init_stmt: ");
1240 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
1241 }
1242 }
1243
1244 /* Function vect_init_vector.
1245
1246 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1247 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1248 vector type a vector with all elements equal to VAL is created first.
1249 Place the initialization at BSI if it is not NULL. Otherwise, place the
1250 initialization at the loop preheader.
1251 Return the DEF of INIT_STMT.
1252 It will be used in the vectorization of STMT. */
1253
1254 tree
1255 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1256 {
1257 tree new_var;
1258 gimple init_stmt;
1259 tree vec_oprnd;
1260 tree new_temp;
1261
1262 if (TREE_CODE (type) == VECTOR_TYPE
1263 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1264 {
1265 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1266 {
1267 if (CONSTANT_CLASS_P (val))
1268 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1269 else
1270 {
1271 new_var = create_tmp_reg (TREE_TYPE (type), NULL);
1272 add_referenced_var (new_var);
1273 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1274 new_var, val,
1275 NULL_TREE);
1276 new_temp = make_ssa_name (new_var, init_stmt);
1277 gimple_assign_set_lhs (init_stmt, new_temp);
1278 vect_init_vector_1 (stmt, init_stmt, gsi);
1279 val = new_temp;
1280 }
1281 }
1282 val = build_vector_from_val (type, val);
1283 }
1284
1285 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1286 add_referenced_var (new_var);
1287 init_stmt = gimple_build_assign (new_var, val);
1288 new_temp = make_ssa_name (new_var, init_stmt);
1289 gimple_assign_set_lhs (init_stmt, new_temp);
1290 vect_init_vector_1 (stmt, init_stmt, gsi);
1291 vec_oprnd = gimple_assign_lhs (init_stmt);
1292 return vec_oprnd;
1293 }
1294
1295
1296 /* Function vect_get_vec_def_for_operand.
1297
1298 OP is an operand in STMT. This function returns a (vector) def that will be
1299 used in the vectorized stmt for STMT.
1300
1301 In the case that OP is an SSA_NAME which is defined in the loop, then
1302 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1303
1304 In case OP is an invariant or constant, a new stmt that creates a vector def
1305 needs to be introduced. */
1306
1307 tree
1308 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1309 {
1310 tree vec_oprnd;
1311 gimple vec_stmt;
1312 gimple def_stmt;
1313 stmt_vec_info def_stmt_info = NULL;
1314 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1315 unsigned int nunits;
1316 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1317 tree def;
1318 enum vect_def_type dt;
1319 bool is_simple_use;
1320 tree vector_type;
1321
1322 if (vect_print_dump_info (REPORT_DETAILS))
1323 {
1324 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1325 print_generic_expr (vect_dump, op, TDF_SLIM);
1326 }
1327
1328 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1329 &def_stmt, &def, &dt);
1330 gcc_assert (is_simple_use);
1331 if (vect_print_dump_info (REPORT_DETAILS))
1332 {
1333 if (def)
1334 {
1335 fprintf (vect_dump, "def = ");
1336 print_generic_expr (vect_dump, def, TDF_SLIM);
1337 }
1338 if (def_stmt)
1339 {
1340 fprintf (vect_dump, " def_stmt = ");
1341 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1342 }
1343 }
1344
1345 switch (dt)
1346 {
1347 /* Case 1: operand is a constant. */
1348 case vect_constant_def:
1349 {
1350 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1351 gcc_assert (vector_type);
1352 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1353
1354 if (scalar_def)
1355 *scalar_def = op;
1356
1357 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1358 if (vect_print_dump_info (REPORT_DETAILS))
1359 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1360
1361 return vect_init_vector (stmt, op, vector_type, NULL);
1362 }
1363
1364 /* Case 2: operand is defined outside the loop - loop invariant. */
1365 case vect_external_def:
1366 {
1367 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1368 gcc_assert (vector_type);
1369
1370 if (scalar_def)
1371 *scalar_def = def;
1372
1373 /* Create 'vec_inv = {inv,inv,..,inv}' */
1374 if (vect_print_dump_info (REPORT_DETAILS))
1375 fprintf (vect_dump, "Create vector_inv.");
1376
1377 return vect_init_vector (stmt, def, vector_type, NULL);
1378 }
1379
1380 /* Case 3: operand is defined inside the loop. */
1381 case vect_internal_def:
1382 {
1383 if (scalar_def)
1384 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1385
1386 /* Get the def from the vectorized stmt. */
1387 def_stmt_info = vinfo_for_stmt (def_stmt);
1388
1389 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1390 /* Get vectorized pattern statement. */
1391 if (!vec_stmt
1392 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1393 && !STMT_VINFO_RELEVANT (def_stmt_info))
1394 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1395 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1396 gcc_assert (vec_stmt);
1397 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1398 vec_oprnd = PHI_RESULT (vec_stmt);
1399 else if (is_gimple_call (vec_stmt))
1400 vec_oprnd = gimple_call_lhs (vec_stmt);
1401 else
1402 vec_oprnd = gimple_assign_lhs (vec_stmt);
1403 return vec_oprnd;
1404 }
1405
1406 /* Case 4: operand is defined by a loop header phi - reduction */
1407 case vect_reduction_def:
1408 case vect_double_reduction_def:
1409 case vect_nested_cycle:
1410 {
1411 struct loop *loop;
1412
1413 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1414 loop = (gimple_bb (def_stmt))->loop_father;
1415
1416 /* Get the def before the loop */
1417 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1418 return get_initial_def_for_reduction (stmt, op, scalar_def);
1419 }
1420
1421 /* Case 5: operand is defined by loop-header phi - induction. */
1422 case vect_induction_def:
1423 {
1424 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1425
1426 /* Get the def from the vectorized stmt. */
1427 def_stmt_info = vinfo_for_stmt (def_stmt);
1428 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1429 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1430 vec_oprnd = PHI_RESULT (vec_stmt);
1431 else
1432 vec_oprnd = gimple_get_lhs (vec_stmt);
1433 return vec_oprnd;
1434 }
1435
1436 default:
1437 gcc_unreachable ();
1438 }
1439 }
1440
1441
1442 /* Function vect_get_vec_def_for_stmt_copy
1443
1444 Return a vector-def for an operand. This function is used when the
1445 vectorized stmt to be created (by the caller to this function) is a "copy"
1446 created in case the vectorized result cannot fit in one vector, and several
1447 copies of the vector-stmt are required. In this case the vector-def is
1448 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1449 of the stmt that defines VEC_OPRND.
1450 DT is the type of the vector def VEC_OPRND.
1451
1452 Context:
1453 In case the vectorization factor (VF) is bigger than the number
1454 of elements that can fit in a vectype (nunits), we have to generate
1455 more than one vector stmt to vectorize the scalar stmt. This situation
1456 arises when there are multiple data-types operated upon in the loop; the
1457 smallest data-type determines the VF, and as a result, when vectorizing
1458 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1459 vector stmt (each computing a vector of 'nunits' results, and together
1460 computing 'VF' results in each iteration). This function is called when
1461 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1462 which VF=16 and nunits=4, so the number of copies required is 4):
1463
1464 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1465
1466 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1467 VS1.1: vx.1 = memref1 VS1.2
1468 VS1.2: vx.2 = memref2 VS1.3
1469 VS1.3: vx.3 = memref3
1470
1471 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1472 VSnew.1: vz1 = vx.1 + ... VSnew.2
1473 VSnew.2: vz2 = vx.2 + ... VSnew.3
1474 VSnew.3: vz3 = vx.3 + ...
1475
1476 The vectorization of S1 is explained in vectorizable_load.
1477 The vectorization of S2:
1478 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1479 the function 'vect_get_vec_def_for_operand' is called to
1480 get the relevant vector-def for each operand of S2. For operand x it
1481 returns the vector-def 'vx.0'.
1482
1483 To create the remaining copies of the vector-stmt (VSnew.j), this
1484 function is called to get the relevant vector-def for each operand. It is
1485 obtained from the respective VS1.j stmt, which is recorded in the
1486 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1487
1488 For example, to obtain the vector-def 'vx.1' in order to create the
1489 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1490 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1491 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1492 and return its def ('vx.1').
1493 Overall, to create the above sequence this function will be called 3 times:
1494 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1495 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1496 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1497
1498 tree
1499 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1500 {
1501 gimple vec_stmt_for_operand;
1502 stmt_vec_info def_stmt_info;
1503
1504 /* Do nothing; can reuse same def. */
1505 if (dt == vect_external_def || dt == vect_constant_def )
1506 return vec_oprnd;
1507
1508 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1509 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1510 gcc_assert (def_stmt_info);
1511 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1512 gcc_assert (vec_stmt_for_operand);
1513 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1514 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1515 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1516 else
1517 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1518 return vec_oprnd;
1519 }
1520
1521
1522 /* Get vectorized definitions for the operands to create a copy of an original
1523 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1524
1525 static void
1526 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1527 VEC(tree,heap) **vec_oprnds0,
1528 VEC(tree,heap) **vec_oprnds1)
1529 {
1530 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1531
1532 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1533 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1534
1535 if (vec_oprnds1 && *vec_oprnds1)
1536 {
1537 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1538 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1539 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1540 }
1541 }
1542
1543
1544 /* Get vectorized definitions for OP0 and OP1.
1545 REDUC_INDEX is the index of reduction operand in case of reduction,
1546 and -1 otherwise. */
1547
1548 void
1549 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1550 VEC (tree, heap) **vec_oprnds0,
1551 VEC (tree, heap) **vec_oprnds1,
1552 slp_tree slp_node, int reduc_index)
1553 {
1554 if (slp_node)
1555 {
1556 int nops = (op1 == NULL_TREE) ? 1 : 2;
1557 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1558 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1559
1560 VEC_quick_push (tree, ops, op0);
1561 if (op1)
1562 VEC_quick_push (tree, ops, op1);
1563
1564 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1565
1566 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1567 if (op1)
1568 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1569
1570 VEC_free (tree, heap, ops);
1571 VEC_free (slp_void_p, heap, vec_defs);
1572 }
1573 else
1574 {
1575 tree vec_oprnd;
1576
1577 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1578 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1579 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1580
1581 if (op1)
1582 {
1583 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1584 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1585 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1586 }
1587 }
1588 }
1589
1590
1591 /* Function vect_finish_stmt_generation.
1592
1593 Insert a new stmt. */
1594
1595 void
1596 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1597 gimple_stmt_iterator *gsi)
1598 {
1599 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1600 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1601 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1602
1603 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1604
1605 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1606
1607 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1608 bb_vinfo));
1609
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 {
1612 fprintf (vect_dump, "add new stmt: ");
1613 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1614 }
1615
1616 gimple_set_location (vec_stmt, gimple_location (stmt));
1617 }
1618
1619 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1620 a function declaration if the target has a vectorized version
1621 of the function, or NULL_TREE if the function cannot be vectorized. */
1622
1623 tree
1624 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1625 {
1626 tree fndecl = gimple_call_fndecl (call);
1627
1628 /* We only handle functions that do not read or clobber memory -- i.e.
1629 const or novops ones. */
1630 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1631 return NULL_TREE;
1632
1633 if (!fndecl
1634 || TREE_CODE (fndecl) != FUNCTION_DECL
1635 || !DECL_BUILT_IN (fndecl))
1636 return NULL_TREE;
1637
1638 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1639 vectype_in);
1640 }
1641
1642 /* Function vectorizable_call.
1643
1644 Check if STMT performs a function call that can be vectorized.
1645 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1646 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1647 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1648
1649 static bool
1650 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1651 slp_tree slp_node)
1652 {
1653 tree vec_dest;
1654 tree scalar_dest;
1655 tree op, type;
1656 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1657 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1658 tree vectype_out, vectype_in;
1659 int nunits_in;
1660 int nunits_out;
1661 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1662 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1663 tree fndecl, new_temp, def, rhs_type;
1664 gimple def_stmt;
1665 enum vect_def_type dt[3]
1666 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1667 gimple new_stmt = NULL;
1668 int ncopies, j;
1669 VEC(tree, heap) *vargs = NULL;
1670 enum { NARROW, NONE, WIDEN } modifier;
1671 size_t i, nargs;
1672 tree lhs;
1673
1674 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1675 return false;
1676
1677 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1678 return false;
1679
1680 /* Is STMT a vectorizable call? */
1681 if (!is_gimple_call (stmt))
1682 return false;
1683
1684 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1685 return false;
1686
1687 if (stmt_can_throw_internal (stmt))
1688 return false;
1689
1690 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1691
1692 /* Process function arguments. */
1693 rhs_type = NULL_TREE;
1694 vectype_in = NULL_TREE;
1695 nargs = gimple_call_num_args (stmt);
1696
1697 /* Bail out if the function has more than three arguments, we do not have
1698 interesting builtin functions to vectorize with more than two arguments
1699 except for fma. No arguments is also not good. */
1700 if (nargs == 0 || nargs > 3)
1701 return false;
1702
1703 for (i = 0; i < nargs; i++)
1704 {
1705 tree opvectype;
1706
1707 op = gimple_call_arg (stmt, i);
1708
1709 /* We can only handle calls with arguments of the same type. */
1710 if (rhs_type
1711 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1712 {
1713 if (vect_print_dump_info (REPORT_DETAILS))
1714 fprintf (vect_dump, "argument types differ.");
1715 return false;
1716 }
1717 if (!rhs_type)
1718 rhs_type = TREE_TYPE (op);
1719
1720 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1721 &def_stmt, &def, &dt[i], &opvectype))
1722 {
1723 if (vect_print_dump_info (REPORT_DETAILS))
1724 fprintf (vect_dump, "use not simple.");
1725 return false;
1726 }
1727
1728 if (!vectype_in)
1729 vectype_in = opvectype;
1730 else if (opvectype
1731 && opvectype != vectype_in)
1732 {
1733 if (vect_print_dump_info (REPORT_DETAILS))
1734 fprintf (vect_dump, "argument vector types differ.");
1735 return false;
1736 }
1737 }
1738 /* If all arguments are external or constant defs use a vector type with
1739 the same size as the output vector type. */
1740 if (!vectype_in)
1741 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1742 if (vec_stmt)
1743 gcc_assert (vectype_in);
1744 if (!vectype_in)
1745 {
1746 if (vect_print_dump_info (REPORT_DETAILS))
1747 {
1748 fprintf (vect_dump, "no vectype for scalar type ");
1749 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1750 }
1751
1752 return false;
1753 }
1754
1755 /* FORNOW */
1756 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1757 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1758 if (nunits_in == nunits_out / 2)
1759 modifier = NARROW;
1760 else if (nunits_out == nunits_in)
1761 modifier = NONE;
1762 else if (nunits_out == nunits_in / 2)
1763 modifier = WIDEN;
1764 else
1765 return false;
1766
1767 /* For now, we only vectorize functions if a target specific builtin
1768 is available. TODO -- in some cases, it might be profitable to
1769 insert the calls for pieces of the vector, in order to be able
1770 to vectorize other operations in the loop. */
1771 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1772 if (fndecl == NULL_TREE)
1773 {
1774 if (vect_print_dump_info (REPORT_DETAILS))
1775 fprintf (vect_dump, "function is not vectorizable.");
1776
1777 return false;
1778 }
1779
1780 gcc_assert (!gimple_vuse (stmt));
1781
1782 if (slp_node || PURE_SLP_STMT (stmt_info))
1783 ncopies = 1;
1784 else if (modifier == NARROW)
1785 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1786 else
1787 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1788
1789 /* Sanity check: make sure that at least one copy of the vectorized stmt
1790 needs to be generated. */
1791 gcc_assert (ncopies >= 1);
1792
1793 if (!vec_stmt) /* transformation not required. */
1794 {
1795 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1796 if (vect_print_dump_info (REPORT_DETAILS))
1797 fprintf (vect_dump, "=== vectorizable_call ===");
1798 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1799 return true;
1800 }
1801
1802 /** Transform. **/
1803
1804 if (vect_print_dump_info (REPORT_DETAILS))
1805 fprintf (vect_dump, "transform call.");
1806
1807 /* Handle def. */
1808 scalar_dest = gimple_call_lhs (stmt);
1809 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1810
1811 prev_stmt_info = NULL;
1812 switch (modifier)
1813 {
1814 case NONE:
1815 for (j = 0; j < ncopies; ++j)
1816 {
1817 /* Build argument list for the vectorized call. */
1818 if (j == 0)
1819 vargs = VEC_alloc (tree, heap, nargs);
1820 else
1821 VEC_truncate (tree, vargs, 0);
1822
1823 if (slp_node)
1824 {
1825 VEC (slp_void_p, heap) *vec_defs
1826 = VEC_alloc (slp_void_p, heap, nargs);
1827 VEC (tree, heap) *vec_oprnds0;
1828
1829 for (i = 0; i < nargs; i++)
1830 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1831 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1832 vec_oprnds0
1833 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1834
1835 /* Arguments are ready. Create the new vector stmt. */
1836 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1837 {
1838 size_t k;
1839 for (k = 0; k < nargs; k++)
1840 {
1841 VEC (tree, heap) *vec_oprndsk
1842 = (VEC (tree, heap) *)
1843 VEC_index (slp_void_p, vec_defs, k);
1844 VEC_replace (tree, vargs, k,
1845 VEC_index (tree, vec_oprndsk, i));
1846 }
1847 new_stmt = gimple_build_call_vec (fndecl, vargs);
1848 new_temp = make_ssa_name (vec_dest, new_stmt);
1849 gimple_call_set_lhs (new_stmt, new_temp);
1850 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1851 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1852 new_stmt);
1853 }
1854
1855 for (i = 0; i < nargs; i++)
1856 {
1857 VEC (tree, heap) *vec_oprndsi
1858 = (VEC (tree, heap) *)
1859 VEC_index (slp_void_p, vec_defs, i);
1860 VEC_free (tree, heap, vec_oprndsi);
1861 }
1862 VEC_free (slp_void_p, heap, vec_defs);
1863 continue;
1864 }
1865
1866 for (i = 0; i < nargs; i++)
1867 {
1868 op = gimple_call_arg (stmt, i);
1869 if (j == 0)
1870 vec_oprnd0
1871 = vect_get_vec_def_for_operand (op, stmt, NULL);
1872 else
1873 {
1874 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1875 vec_oprnd0
1876 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1877 }
1878
1879 VEC_quick_push (tree, vargs, vec_oprnd0);
1880 }
1881
1882 new_stmt = gimple_build_call_vec (fndecl, vargs);
1883 new_temp = make_ssa_name (vec_dest, new_stmt);
1884 gimple_call_set_lhs (new_stmt, new_temp);
1885 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1886
1887 if (j == 0)
1888 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1889 else
1890 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1891
1892 prev_stmt_info = vinfo_for_stmt (new_stmt);
1893 }
1894
1895 break;
1896
1897 case NARROW:
1898 for (j = 0; j < ncopies; ++j)
1899 {
1900 /* Build argument list for the vectorized call. */
1901 if (j == 0)
1902 vargs = VEC_alloc (tree, heap, nargs * 2);
1903 else
1904 VEC_truncate (tree, vargs, 0);
1905
1906 if (slp_node)
1907 {
1908 VEC (slp_void_p, heap) *vec_defs
1909 = VEC_alloc (slp_void_p, heap, nargs);
1910 VEC (tree, heap) *vec_oprnds0;
1911
1912 for (i = 0; i < nargs; i++)
1913 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1914 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1915 vec_oprnds0
1916 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1917
1918 /* Arguments are ready. Create the new vector stmt. */
1919 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1920 i += 2)
1921 {
1922 size_t k;
1923 VEC_truncate (tree, vargs, 0);
1924 for (k = 0; k < nargs; k++)
1925 {
1926 VEC (tree, heap) *vec_oprndsk
1927 = (VEC (tree, heap) *)
1928 VEC_index (slp_void_p, vec_defs, k);
1929 VEC_quick_push (tree, vargs,
1930 VEC_index (tree, vec_oprndsk, i));
1931 VEC_quick_push (tree, vargs,
1932 VEC_index (tree, vec_oprndsk, i + 1));
1933 }
1934 new_stmt = gimple_build_call_vec (fndecl, vargs);
1935 new_temp = make_ssa_name (vec_dest, new_stmt);
1936 gimple_call_set_lhs (new_stmt, new_temp);
1937 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1938 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1939 new_stmt);
1940 }
1941
1942 for (i = 0; i < nargs; i++)
1943 {
1944 VEC (tree, heap) *vec_oprndsi
1945 = (VEC (tree, heap) *)
1946 VEC_index (slp_void_p, vec_defs, i);
1947 VEC_free (tree, heap, vec_oprndsi);
1948 }
1949 VEC_free (slp_void_p, heap, vec_defs);
1950 continue;
1951 }
1952
1953 for (i = 0; i < nargs; i++)
1954 {
1955 op = gimple_call_arg (stmt, i);
1956 if (j == 0)
1957 {
1958 vec_oprnd0
1959 = vect_get_vec_def_for_operand (op, stmt, NULL);
1960 vec_oprnd1
1961 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1962 }
1963 else
1964 {
1965 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1966 vec_oprnd0
1967 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1968 vec_oprnd1
1969 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1970 }
1971
1972 VEC_quick_push (tree, vargs, vec_oprnd0);
1973 VEC_quick_push (tree, vargs, vec_oprnd1);
1974 }
1975
1976 new_stmt = gimple_build_call_vec (fndecl, vargs);
1977 new_temp = make_ssa_name (vec_dest, new_stmt);
1978 gimple_call_set_lhs (new_stmt, new_temp);
1979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1980
1981 if (j == 0)
1982 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1983 else
1984 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1985
1986 prev_stmt_info = vinfo_for_stmt (new_stmt);
1987 }
1988
1989 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1990
1991 break;
1992
1993 case WIDEN:
1994 /* No current target implements this case. */
1995 return false;
1996 }
1997
1998 VEC_free (tree, heap, vargs);
1999
2000 /* Update the exception handling table with the vector stmt if necessary. */
2001 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2002 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2003
2004 /* The call in STMT might prevent it from being removed in dce.
2005 We however cannot remove it here, due to the way the ssa name
2006 it defines is mapped to the new definition. So just replace
2007 rhs of the statement with something harmless. */
2008
2009 if (slp_node)
2010 return true;
2011
2012 type = TREE_TYPE (scalar_dest);
2013 if (is_pattern_stmt_p (stmt_info))
2014 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2015 else
2016 lhs = gimple_call_lhs (stmt);
2017 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2018 set_vinfo_for_stmt (new_stmt, stmt_info);
2019 set_vinfo_for_stmt (stmt, NULL);
2020 STMT_VINFO_STMT (stmt_info) = new_stmt;
2021 gsi_replace (gsi, new_stmt, false);
2022 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2023
2024 return true;
2025 }
2026
2027
2028 /* Function vect_gen_widened_results_half
2029
2030 Create a vector stmt whose code, type, number of arguments, and result
2031 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2032 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2033 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2034 needs to be created (DECL is a function-decl of a target-builtin).
2035 STMT is the original scalar stmt that we are vectorizing. */
2036
2037 static gimple
2038 vect_gen_widened_results_half (enum tree_code code,
2039 tree decl,
2040 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2041 tree vec_dest, gimple_stmt_iterator *gsi,
2042 gimple stmt)
2043 {
2044 gimple new_stmt;
2045 tree new_temp;
2046
2047 /* Generate half of the widened result: */
2048 if (code == CALL_EXPR)
2049 {
2050 /* Target specific support */
2051 if (op_type == binary_op)
2052 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2053 else
2054 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2055 new_temp = make_ssa_name (vec_dest, new_stmt);
2056 gimple_call_set_lhs (new_stmt, new_temp);
2057 }
2058 else
2059 {
2060 /* Generic support */
2061 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2062 if (op_type != binary_op)
2063 vec_oprnd1 = NULL;
2064 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2065 vec_oprnd1);
2066 new_temp = make_ssa_name (vec_dest, new_stmt);
2067 gimple_assign_set_lhs (new_stmt, new_temp);
2068 }
2069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2070
2071 return new_stmt;
2072 }
2073
2074
2075 /* Get vectorized definitions for loop-based vectorization. For the first
2076 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2077 scalar operand), and for the rest we get a copy with
2078 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2079 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2080 The vectors are collected into VEC_OPRNDS. */
2081
2082 static void
2083 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2084 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2085 {
2086 tree vec_oprnd;
2087
2088 /* Get first vector operand. */
2089 /* All the vector operands except the very first one (that is scalar oprnd)
2090 are stmt copies. */
2091 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2092 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2093 else
2094 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2095
2096 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2097
2098 /* Get second vector operand. */
2099 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2100 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2101
2102 *oprnd = vec_oprnd;
2103
2104 /* For conversion in multiple steps, continue to get operands
2105 recursively. */
2106 if (multi_step_cvt)
2107 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2108 }
2109
2110
2111 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2112 For multi-step conversions store the resulting vectors and call the function
2113 recursively. */
2114
2115 static void
2116 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2117 int multi_step_cvt, gimple stmt,
2118 VEC (tree, heap) *vec_dsts,
2119 gimple_stmt_iterator *gsi,
2120 slp_tree slp_node, enum tree_code code,
2121 stmt_vec_info *prev_stmt_info)
2122 {
2123 unsigned int i;
2124 tree vop0, vop1, new_tmp, vec_dest;
2125 gimple new_stmt;
2126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2127
2128 vec_dest = VEC_pop (tree, vec_dsts);
2129
2130 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2131 {
2132 /* Create demotion operation. */
2133 vop0 = VEC_index (tree, *vec_oprnds, i);
2134 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2135 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2136 new_tmp = make_ssa_name (vec_dest, new_stmt);
2137 gimple_assign_set_lhs (new_stmt, new_tmp);
2138 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2139
2140 if (multi_step_cvt)
2141 /* Store the resulting vector for next recursive call. */
2142 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2143 else
2144 {
2145 /* This is the last step of the conversion sequence. Store the
2146 vectors in SLP_NODE or in vector info of the scalar statement
2147 (or in STMT_VINFO_RELATED_STMT chain). */
2148 if (slp_node)
2149 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2150 else
2151 {
2152 if (!*prev_stmt_info)
2153 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2154 else
2155 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2156
2157 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2158 }
2159 }
2160 }
2161
2162 /* For multi-step demotion operations we first generate demotion operations
2163 from the source type to the intermediate types, and then combine the
2164 results (stored in VEC_OPRNDS) in demotion operation to the destination
2165 type. */
2166 if (multi_step_cvt)
2167 {
2168 /* At each level of recursion we have half of the operands we had at the
2169 previous level. */
2170 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2171 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2172 stmt, vec_dsts, gsi, slp_node,
2173 VEC_PACK_TRUNC_EXPR,
2174 prev_stmt_info);
2175 }
2176
2177 VEC_quick_push (tree, vec_dsts, vec_dest);
2178 }
2179
2180
2181 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2182 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2183 the resulting vectors and call the function recursively. */
2184
2185 static void
2186 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2187 VEC (tree, heap) **vec_oprnds1,
2188 gimple stmt, tree vec_dest,
2189 gimple_stmt_iterator *gsi,
2190 enum tree_code code1,
2191 enum tree_code code2, tree decl1,
2192 tree decl2, int op_type)
2193 {
2194 int i;
2195 tree vop0, vop1, new_tmp1, new_tmp2;
2196 gimple new_stmt1, new_stmt2;
2197 VEC (tree, heap) *vec_tmp = NULL;
2198
2199 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2200 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2201 {
2202 if (op_type == binary_op)
2203 vop1 = VEC_index (tree, *vec_oprnds1, i);
2204 else
2205 vop1 = NULL_TREE;
2206
2207 /* Generate the two halves of promotion operation. */
2208 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2209 op_type, vec_dest, gsi, stmt);
2210 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2211 op_type, vec_dest, gsi, stmt);
2212 if (is_gimple_call (new_stmt1))
2213 {
2214 new_tmp1 = gimple_call_lhs (new_stmt1);
2215 new_tmp2 = gimple_call_lhs (new_stmt2);
2216 }
2217 else
2218 {
2219 new_tmp1 = gimple_assign_lhs (new_stmt1);
2220 new_tmp2 = gimple_assign_lhs (new_stmt2);
2221 }
2222
2223 /* Store the results for the next step. */
2224 VEC_quick_push (tree, vec_tmp, new_tmp1);
2225 VEC_quick_push (tree, vec_tmp, new_tmp2);
2226 }
2227
2228 VEC_free (tree, heap, *vec_oprnds0);
2229 *vec_oprnds0 = vec_tmp;
2230 }
2231
2232
2233 /* Check if STMT performs a conversion operation, that can be vectorized.
2234 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2235 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2236 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2237
2238 static bool
2239 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2240 gimple *vec_stmt, slp_tree slp_node)
2241 {
2242 tree vec_dest;
2243 tree scalar_dest;
2244 tree op0, op1 = NULL_TREE;
2245 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2247 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2248 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2249 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2250 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2251 tree new_temp;
2252 tree def;
2253 gimple def_stmt;
2254 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2255 gimple new_stmt = NULL;
2256 stmt_vec_info prev_stmt_info;
2257 int nunits_in;
2258 int nunits_out;
2259 tree vectype_out, vectype_in;
2260 int ncopies, i, j;
2261 tree lhs_type, rhs_type;
2262 enum { NARROW, NONE, WIDEN } modifier;
2263 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2264 tree vop0;
2265 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2266 int multi_step_cvt = 0;
2267 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2268 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2269 int op_type;
2270 enum machine_mode rhs_mode;
2271 unsigned short fltsz;
2272
2273 /* Is STMT a vectorizable conversion? */
2274
2275 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2276 return false;
2277
2278 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2279 return false;
2280
2281 if (!is_gimple_assign (stmt))
2282 return false;
2283
2284 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2285 return false;
2286
2287 code = gimple_assign_rhs_code (stmt);
2288 if (!CONVERT_EXPR_CODE_P (code)
2289 && code != FIX_TRUNC_EXPR
2290 && code != FLOAT_EXPR
2291 && code != WIDEN_MULT_EXPR
2292 && code != WIDEN_LSHIFT_EXPR)
2293 return false;
2294
2295 op_type = TREE_CODE_LENGTH (code);
2296
2297 /* Check types of lhs and rhs. */
2298 scalar_dest = gimple_assign_lhs (stmt);
2299 lhs_type = TREE_TYPE (scalar_dest);
2300 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2301
2302 op0 = gimple_assign_rhs1 (stmt);
2303 rhs_type = TREE_TYPE (op0);
2304
2305 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2306 && !((INTEGRAL_TYPE_P (lhs_type)
2307 && INTEGRAL_TYPE_P (rhs_type))
2308 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2309 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2310 return false;
2311
2312 if ((INTEGRAL_TYPE_P (lhs_type)
2313 && (TYPE_PRECISION (lhs_type)
2314 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2315 || (INTEGRAL_TYPE_P (rhs_type)
2316 && (TYPE_PRECISION (rhs_type)
2317 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2318 {
2319 if (vect_print_dump_info (REPORT_DETAILS))
2320 fprintf (vect_dump,
2321 "type conversion to/from bit-precision unsupported.");
2322 return false;
2323 }
2324
2325 /* Check the operands of the operation. */
2326 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2327 &def_stmt, &def, &dt[0], &vectype_in))
2328 {
2329 if (vect_print_dump_info (REPORT_DETAILS))
2330 fprintf (vect_dump, "use not simple.");
2331 return false;
2332 }
2333 if (op_type == binary_op)
2334 {
2335 bool ok;
2336
2337 op1 = gimple_assign_rhs2 (stmt);
2338 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2339 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2340 OP1. */
2341 if (CONSTANT_CLASS_P (op0))
2342 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2343 &def_stmt, &def, &dt[1], &vectype_in);
2344 else
2345 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2346 &def, &dt[1]);
2347
2348 if (!ok)
2349 {
2350 if (vect_print_dump_info (REPORT_DETAILS))
2351 fprintf (vect_dump, "use not simple.");
2352 return false;
2353 }
2354 }
2355
2356 /* If op0 is an external or constant defs use a vector type of
2357 the same size as the output vector type. */
2358 if (!vectype_in)
2359 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2360 if (vec_stmt)
2361 gcc_assert (vectype_in);
2362 if (!vectype_in)
2363 {
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 {
2366 fprintf (vect_dump, "no vectype for scalar type ");
2367 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2368 }
2369
2370 return false;
2371 }
2372
2373 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2374 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2375 if (nunits_in < nunits_out)
2376 modifier = NARROW;
2377 else if (nunits_out == nunits_in)
2378 modifier = NONE;
2379 else
2380 modifier = WIDEN;
2381
2382 /* Multiple types in SLP are handled by creating the appropriate number of
2383 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2384 case of SLP. */
2385 if (slp_node || PURE_SLP_STMT (stmt_info))
2386 ncopies = 1;
2387 else if (modifier == NARROW)
2388 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2389 else
2390 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2391
2392 /* Sanity check: make sure that at least one copy of the vectorized stmt
2393 needs to be generated. */
2394 gcc_assert (ncopies >= 1);
2395
2396 /* Supportable by target? */
2397 switch (modifier)
2398 {
2399 case NONE:
2400 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2401 return false;
2402 if (supportable_convert_operation (code, vectype_out, vectype_in,
2403 &decl1, &code1))
2404 break;
2405 /* FALLTHRU */
2406 unsupported:
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "conversion not supported by target.");
2409 return false;
2410
2411 case WIDEN:
2412 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2413 &decl1, &decl2, &code1, &code2,
2414 &multi_step_cvt, &interm_types))
2415 {
2416 /* Binary widening operation can only be supported directly by the
2417 architecture. */
2418 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2419 break;
2420 }
2421
2422 if (code != FLOAT_EXPR
2423 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2424 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2425 goto unsupported;
2426
2427 rhs_mode = TYPE_MODE (rhs_type);
2428 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2429 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2430 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2431 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2432 {
2433 cvt_type
2434 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2435 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2436 if (cvt_type == NULL_TREE)
2437 goto unsupported;
2438
2439 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2440 {
2441 if (!supportable_convert_operation (code, vectype_out,
2442 cvt_type, &decl1, &codecvt1))
2443 goto unsupported;
2444 }
2445 else if (!supportable_widening_operation (code, stmt, vectype_out,
2446 cvt_type, &decl1, &decl2,
2447 &codecvt1, &codecvt2,
2448 &multi_step_cvt,
2449 &interm_types))
2450 continue;
2451 else
2452 gcc_assert (multi_step_cvt == 0);
2453
2454 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2455 vectype_in, NULL, NULL, &code1,
2456 &code2, &multi_step_cvt,
2457 &interm_types))
2458 break;
2459 }
2460
2461 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2462 goto unsupported;
2463
2464 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2465 codecvt2 = ERROR_MARK;
2466 else
2467 {
2468 multi_step_cvt++;
2469 VEC_safe_push (tree, heap, interm_types, cvt_type);
2470 cvt_type = NULL_TREE;
2471 }
2472 break;
2473
2474 case NARROW:
2475 gcc_assert (op_type == unary_op);
2476 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2477 &code1, &multi_step_cvt,
2478 &interm_types))
2479 break;
2480
2481 if (code != FIX_TRUNC_EXPR
2482 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2483 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2484 goto unsupported;
2485
2486 rhs_mode = TYPE_MODE (rhs_type);
2487 cvt_type
2488 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2489 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2490 if (cvt_type == NULL_TREE)
2491 goto unsupported;
2492 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2493 &decl1, &codecvt1))
2494 goto unsupported;
2495 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2496 &code1, &multi_step_cvt,
2497 &interm_types))
2498 break;
2499 goto unsupported;
2500
2501 default:
2502 gcc_unreachable ();
2503 }
2504
2505 if (!vec_stmt) /* transformation not required. */
2506 {
2507 if (vect_print_dump_info (REPORT_DETAILS))
2508 fprintf (vect_dump, "=== vectorizable_conversion ===");
2509 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2510 {
2511 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2512 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2513 }
2514 else if (modifier == NARROW)
2515 {
2516 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2517 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2518 }
2519 else
2520 {
2521 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2522 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2523 }
2524 VEC_free (tree, heap, interm_types);
2525 return true;
2526 }
2527
2528 /** Transform. **/
2529 if (vect_print_dump_info (REPORT_DETAILS))
2530 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2531
2532 if (op_type == binary_op)
2533 {
2534 if (CONSTANT_CLASS_P (op0))
2535 op0 = fold_convert (TREE_TYPE (op1), op0);
2536 else if (CONSTANT_CLASS_P (op1))
2537 op1 = fold_convert (TREE_TYPE (op0), op1);
2538 }
2539
2540 /* In case of multi-step conversion, we first generate conversion operations
2541 to the intermediate types, and then from that types to the final one.
2542 We create vector destinations for the intermediate type (TYPES) received
2543 from supportable_*_operation, and store them in the correct order
2544 for future use in vect_create_vectorized_*_stmts (). */
2545 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2546 vec_dest = vect_create_destination_var (scalar_dest,
2547 (cvt_type && modifier == WIDEN)
2548 ? cvt_type : vectype_out);
2549 VEC_quick_push (tree, vec_dsts, vec_dest);
2550
2551 if (multi_step_cvt)
2552 {
2553 for (i = VEC_length (tree, interm_types) - 1;
2554 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2555 {
2556 vec_dest = vect_create_destination_var (scalar_dest,
2557 intermediate_type);
2558 VEC_quick_push (tree, vec_dsts, vec_dest);
2559 }
2560 }
2561
2562 if (cvt_type)
2563 vec_dest = vect_create_destination_var (scalar_dest,
2564 modifier == WIDEN
2565 ? vectype_out : cvt_type);
2566
2567 if (!slp_node)
2568 {
2569 if (modifier == NONE)
2570 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2571 else if (modifier == WIDEN)
2572 {
2573 vec_oprnds0 = VEC_alloc (tree, heap,
2574 (multi_step_cvt
2575 ? vect_pow2 (multi_step_cvt) : 1));
2576 if (op_type == binary_op)
2577 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2578 }
2579 else
2580 vec_oprnds0 = VEC_alloc (tree, heap,
2581 2 * (multi_step_cvt
2582 ? vect_pow2 (multi_step_cvt) : 1));
2583 }
2584 else if (code == WIDEN_LSHIFT_EXPR)
2585 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2586
2587 last_oprnd = op0;
2588 prev_stmt_info = NULL;
2589 switch (modifier)
2590 {
2591 case NONE:
2592 for (j = 0; j < ncopies; j++)
2593 {
2594 if (j == 0)
2595 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2596 -1);
2597 else
2598 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2599
2600 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2601 {
2602 /* Arguments are ready, create the new vector stmt. */
2603 if (code1 == CALL_EXPR)
2604 {
2605 new_stmt = gimple_build_call (decl1, 1, vop0);
2606 new_temp = make_ssa_name (vec_dest, new_stmt);
2607 gimple_call_set_lhs (new_stmt, new_temp);
2608 }
2609 else
2610 {
2611 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2612 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2613 vop0, NULL);
2614 new_temp = make_ssa_name (vec_dest, new_stmt);
2615 gimple_assign_set_lhs (new_stmt, new_temp);
2616 }
2617
2618 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2619 if (slp_node)
2620 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2621 new_stmt);
2622 }
2623
2624 if (j == 0)
2625 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2626 else
2627 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2628 prev_stmt_info = vinfo_for_stmt (new_stmt);
2629 }
2630 break;
2631
2632 case WIDEN:
2633 /* In case the vectorization factor (VF) is bigger than the number
2634 of elements that we can fit in a vectype (nunits), we have to
2635 generate more than one vector stmt - i.e - we need to "unroll"
2636 the vector stmt by a factor VF/nunits. */
2637 for (j = 0; j < ncopies; j++)
2638 {
2639 /* Handle uses. */
2640 if (j == 0)
2641 {
2642 if (slp_node)
2643 {
2644 if (code == WIDEN_LSHIFT_EXPR)
2645 {
2646 unsigned int k;
2647
2648 vec_oprnd1 = op1;
2649 /* Store vec_oprnd1 for every vector stmt to be created
2650 for SLP_NODE. We check during the analysis that all
2651 the shift arguments are the same. */
2652 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2653 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2654
2655 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2656 slp_node, -1);
2657 }
2658 else
2659 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2660 &vec_oprnds1, slp_node, -1);
2661 }
2662 else
2663 {
2664 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2665 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2666 if (op_type == binary_op)
2667 {
2668 if (code == WIDEN_LSHIFT_EXPR)
2669 vec_oprnd1 = op1;
2670 else
2671 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2672 NULL);
2673 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2674 }
2675 }
2676 }
2677 else
2678 {
2679 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2680 VEC_truncate (tree, vec_oprnds0, 0);
2681 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2682 if (op_type == binary_op)
2683 {
2684 if (code == WIDEN_LSHIFT_EXPR)
2685 vec_oprnd1 = op1;
2686 else
2687 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2688 vec_oprnd1);
2689 VEC_truncate (tree, vec_oprnds1, 0);
2690 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2691 }
2692 }
2693
2694 /* Arguments are ready. Create the new vector stmts. */
2695 for (i = multi_step_cvt; i >= 0; i--)
2696 {
2697 tree this_dest = VEC_index (tree, vec_dsts, i);
2698 enum tree_code c1 = code1, c2 = code2;
2699 if (i == 0 && codecvt2 != ERROR_MARK)
2700 {
2701 c1 = codecvt1;
2702 c2 = codecvt2;
2703 }
2704 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2705 &vec_oprnds1,
2706 stmt, this_dest, gsi,
2707 c1, c2, decl1, decl2,
2708 op_type);
2709 }
2710
2711 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2712 {
2713 if (cvt_type)
2714 {
2715 if (codecvt1 == CALL_EXPR)
2716 {
2717 new_stmt = gimple_build_call (decl1, 1, vop0);
2718 new_temp = make_ssa_name (vec_dest, new_stmt);
2719 gimple_call_set_lhs (new_stmt, new_temp);
2720 }
2721 else
2722 {
2723 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2724 new_temp = make_ssa_name (vec_dest, NULL);
2725 new_stmt = gimple_build_assign_with_ops (codecvt1,
2726 new_temp,
2727 vop0, NULL);
2728 }
2729
2730 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2731 }
2732 else
2733 new_stmt = SSA_NAME_DEF_STMT (vop0);
2734
2735 if (slp_node)
2736 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2737 new_stmt);
2738 else
2739 {
2740 if (!prev_stmt_info)
2741 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2742 else
2743 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2744 prev_stmt_info = vinfo_for_stmt (new_stmt);
2745 }
2746 }
2747 }
2748
2749 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2750 break;
2751
2752 case NARROW:
2753 /* In case the vectorization factor (VF) is bigger than the number
2754 of elements that we can fit in a vectype (nunits), we have to
2755 generate more than one vector stmt - i.e - we need to "unroll"
2756 the vector stmt by a factor VF/nunits. */
2757 for (j = 0; j < ncopies; j++)
2758 {
2759 /* Handle uses. */
2760 if (slp_node)
2761 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2762 slp_node, -1);
2763 else
2764 {
2765 VEC_truncate (tree, vec_oprnds0, 0);
2766 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2767 vect_pow2 (multi_step_cvt) - 1);
2768 }
2769
2770 /* Arguments are ready. Create the new vector stmts. */
2771 if (cvt_type)
2772 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2773 {
2774 if (codecvt1 == CALL_EXPR)
2775 {
2776 new_stmt = gimple_build_call (decl1, 1, vop0);
2777 new_temp = make_ssa_name (vec_dest, new_stmt);
2778 gimple_call_set_lhs (new_stmt, new_temp);
2779 }
2780 else
2781 {
2782 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2783 new_temp = make_ssa_name (vec_dest, NULL);
2784 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2785 vop0, NULL);
2786 }
2787
2788 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2789 VEC_replace (tree, vec_oprnds0, i, new_temp);
2790 }
2791
2792 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2793 stmt, vec_dsts, gsi,
2794 slp_node, code1,
2795 &prev_stmt_info);
2796 }
2797
2798 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2799 break;
2800 }
2801
2802 VEC_free (tree, heap, vec_oprnds0);
2803 VEC_free (tree, heap, vec_oprnds1);
2804 VEC_free (tree, heap, vec_dsts);
2805 VEC_free (tree, heap, interm_types);
2806
2807 return true;
2808 }
2809
2810
2811 /* Function vectorizable_assignment.
2812
2813 Check if STMT performs an assignment (copy) that can be vectorized.
2814 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2815 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2816 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2817
2818 static bool
2819 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2820 gimple *vec_stmt, slp_tree slp_node)
2821 {
2822 tree vec_dest;
2823 tree scalar_dest;
2824 tree op;
2825 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2827 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2828 tree new_temp;
2829 tree def;
2830 gimple def_stmt;
2831 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2832 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2833 int ncopies;
2834 int i, j;
2835 VEC(tree,heap) *vec_oprnds = NULL;
2836 tree vop;
2837 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2838 gimple new_stmt = NULL;
2839 stmt_vec_info prev_stmt_info = NULL;
2840 enum tree_code code;
2841 tree vectype_in;
2842
2843 /* Multiple types in SLP are handled by creating the appropriate number of
2844 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2845 case of SLP. */
2846 if (slp_node || PURE_SLP_STMT (stmt_info))
2847 ncopies = 1;
2848 else
2849 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2850
2851 gcc_assert (ncopies >= 1);
2852
2853 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2854 return false;
2855
2856 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2857 return false;
2858
2859 /* Is vectorizable assignment? */
2860 if (!is_gimple_assign (stmt))
2861 return false;
2862
2863 scalar_dest = gimple_assign_lhs (stmt);
2864 if (TREE_CODE (scalar_dest) != SSA_NAME)
2865 return false;
2866
2867 code = gimple_assign_rhs_code (stmt);
2868 if (gimple_assign_single_p (stmt)
2869 || code == PAREN_EXPR
2870 || CONVERT_EXPR_CODE_P (code))
2871 op = gimple_assign_rhs1 (stmt);
2872 else
2873 return false;
2874
2875 if (code == VIEW_CONVERT_EXPR)
2876 op = TREE_OPERAND (op, 0);
2877
2878 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2879 &def_stmt, &def, &dt[0], &vectype_in))
2880 {
2881 if (vect_print_dump_info (REPORT_DETAILS))
2882 fprintf (vect_dump, "use not simple.");
2883 return false;
2884 }
2885
2886 /* We can handle NOP_EXPR conversions that do not change the number
2887 of elements or the vector size. */
2888 if ((CONVERT_EXPR_CODE_P (code)
2889 || code == VIEW_CONVERT_EXPR)
2890 && (!vectype_in
2891 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2892 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2893 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2894 return false;
2895
2896 /* We do not handle bit-precision changes. */
2897 if ((CONVERT_EXPR_CODE_P (code)
2898 || code == VIEW_CONVERT_EXPR)
2899 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2900 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2901 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2902 || ((TYPE_PRECISION (TREE_TYPE (op))
2903 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2904 /* But a conversion that does not change the bit-pattern is ok. */
2905 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2906 > TYPE_PRECISION (TREE_TYPE (op)))
2907 && TYPE_UNSIGNED (TREE_TYPE (op))))
2908 {
2909 if (vect_print_dump_info (REPORT_DETAILS))
2910 fprintf (vect_dump, "type conversion to/from bit-precision "
2911 "unsupported.");
2912 return false;
2913 }
2914
2915 if (!vec_stmt) /* transformation not required. */
2916 {
2917 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2918 if (vect_print_dump_info (REPORT_DETAILS))
2919 fprintf (vect_dump, "=== vectorizable_assignment ===");
2920 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2921 return true;
2922 }
2923
2924 /** Transform. **/
2925 if (vect_print_dump_info (REPORT_DETAILS))
2926 fprintf (vect_dump, "transform assignment.");
2927
2928 /* Handle def. */
2929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2930
2931 /* Handle use. */
2932 for (j = 0; j < ncopies; j++)
2933 {
2934 /* Handle uses. */
2935 if (j == 0)
2936 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2937 else
2938 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2939
2940 /* Arguments are ready. create the new vector stmt. */
2941 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2942 {
2943 if (CONVERT_EXPR_CODE_P (code)
2944 || code == VIEW_CONVERT_EXPR)
2945 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2946 new_stmt = gimple_build_assign (vec_dest, vop);
2947 new_temp = make_ssa_name (vec_dest, new_stmt);
2948 gimple_assign_set_lhs (new_stmt, new_temp);
2949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2950 if (slp_node)
2951 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2952 }
2953
2954 if (slp_node)
2955 continue;
2956
2957 if (j == 0)
2958 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2959 else
2960 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2961
2962 prev_stmt_info = vinfo_for_stmt (new_stmt);
2963 }
2964
2965 VEC_free (tree, heap, vec_oprnds);
2966 return true;
2967 }
2968
2969
2970 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2971 either as shift by a scalar or by a vector. */
2972
2973 bool
2974 vect_supportable_shift (enum tree_code code, tree scalar_type)
2975 {
2976
2977 enum machine_mode vec_mode;
2978 optab optab;
2979 int icode;
2980 tree vectype;
2981
2982 vectype = get_vectype_for_scalar_type (scalar_type);
2983 if (!vectype)
2984 return false;
2985
2986 optab = optab_for_tree_code (code, vectype, optab_scalar);
2987 if (!optab
2988 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2989 {
2990 optab = optab_for_tree_code (code, vectype, optab_vector);
2991 if (!optab
2992 || (optab_handler (optab, TYPE_MODE (vectype))
2993 == CODE_FOR_nothing))
2994 return false;
2995 }
2996
2997 vec_mode = TYPE_MODE (vectype);
2998 icode = (int) optab_handler (optab, vec_mode);
2999 if (icode == CODE_FOR_nothing)
3000 return false;
3001
3002 return true;
3003 }
3004
3005
3006 /* Function vectorizable_shift.
3007
3008 Check if STMT performs a shift operation that can be vectorized.
3009 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3010 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3011 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3012
3013 static bool
3014 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3015 gimple *vec_stmt, slp_tree slp_node)
3016 {
3017 tree vec_dest;
3018 tree scalar_dest;
3019 tree op0, op1 = NULL;
3020 tree vec_oprnd1 = NULL_TREE;
3021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3022 tree vectype;
3023 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3024 enum tree_code code;
3025 enum machine_mode vec_mode;
3026 tree new_temp;
3027 optab optab;
3028 int icode;
3029 enum machine_mode optab_op2_mode;
3030 tree def;
3031 gimple def_stmt;
3032 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3033 gimple new_stmt = NULL;
3034 stmt_vec_info prev_stmt_info;
3035 int nunits_in;
3036 int nunits_out;
3037 tree vectype_out;
3038 tree op1_vectype;
3039 int ncopies;
3040 int j, i;
3041 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3042 tree vop0, vop1;
3043 unsigned int k;
3044 bool scalar_shift_arg = true;
3045 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3046 int vf;
3047
3048 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3049 return false;
3050
3051 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3052 return false;
3053
3054 /* Is STMT a vectorizable binary/unary operation? */
3055 if (!is_gimple_assign (stmt))
3056 return false;
3057
3058 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3059 return false;
3060
3061 code = gimple_assign_rhs_code (stmt);
3062
3063 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3064 || code == RROTATE_EXPR))
3065 return false;
3066
3067 scalar_dest = gimple_assign_lhs (stmt);
3068 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3069 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3070 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3071 {
3072 if (vect_print_dump_info (REPORT_DETAILS))
3073 fprintf (vect_dump, "bit-precision shifts not supported.");
3074 return false;
3075 }
3076
3077 op0 = gimple_assign_rhs1 (stmt);
3078 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3079 &def_stmt, &def, &dt[0], &vectype))
3080 {
3081 if (vect_print_dump_info (REPORT_DETAILS))
3082 fprintf (vect_dump, "use not simple.");
3083 return false;
3084 }
3085 /* If op0 is an external or constant def use a vector type with
3086 the same size as the output vector type. */
3087 if (!vectype)
3088 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3089 if (vec_stmt)
3090 gcc_assert (vectype);
3091 if (!vectype)
3092 {
3093 if (vect_print_dump_info (REPORT_DETAILS))
3094 {
3095 fprintf (vect_dump, "no vectype for scalar type ");
3096 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3097 }
3098
3099 return false;
3100 }
3101
3102 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3103 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3104 if (nunits_out != nunits_in)
3105 return false;
3106
3107 op1 = gimple_assign_rhs2 (stmt);
3108 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3109 &def, &dt[1], &op1_vectype))
3110 {
3111 if (vect_print_dump_info (REPORT_DETAILS))
3112 fprintf (vect_dump, "use not simple.");
3113 return false;
3114 }
3115
3116 if (loop_vinfo)
3117 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3118 else
3119 vf = 1;
3120
3121 /* Multiple types in SLP are handled by creating the appropriate number of
3122 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3123 case of SLP. */
3124 if (slp_node || PURE_SLP_STMT (stmt_info))
3125 ncopies = 1;
3126 else
3127 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3128
3129 gcc_assert (ncopies >= 1);
3130
3131 /* Determine whether the shift amount is a vector, or scalar. If the
3132 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3133
3134 if (dt[1] == vect_internal_def && !slp_node)
3135 scalar_shift_arg = false;
3136 else if (dt[1] == vect_constant_def
3137 || dt[1] == vect_external_def
3138 || dt[1] == vect_internal_def)
3139 {
3140 /* In SLP, need to check whether the shift count is the same,
3141 in loops if it is a constant or invariant, it is always
3142 a scalar shift. */
3143 if (slp_node)
3144 {
3145 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3146 gimple slpstmt;
3147
3148 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3149 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3150 scalar_shift_arg = false;
3151 }
3152 }
3153 else
3154 {
3155 if (vect_print_dump_info (REPORT_DETAILS))
3156 fprintf (vect_dump, "operand mode requires invariant argument.");
3157 return false;
3158 }
3159
3160 /* Vector shifted by vector. */
3161 if (!scalar_shift_arg)
3162 {
3163 optab = optab_for_tree_code (code, vectype, optab_vector);
3164 if (vect_print_dump_info (REPORT_DETAILS))
3165 fprintf (vect_dump, "vector/vector shift/rotate found.");
3166 if (!op1_vectype)
3167 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3168 if (op1_vectype == NULL_TREE
3169 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3170 {
3171 if (vect_print_dump_info (REPORT_DETAILS))
3172 fprintf (vect_dump, "unusable type for last operand in"
3173 " vector/vector shift/rotate.");
3174 return false;
3175 }
3176 }
3177 /* See if the machine has a vector shifted by scalar insn and if not
3178 then see if it has a vector shifted by vector insn. */
3179 else
3180 {
3181 optab = optab_for_tree_code (code, vectype, optab_scalar);
3182 if (optab
3183 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3184 {
3185 if (vect_print_dump_info (REPORT_DETAILS))
3186 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3187 }
3188 else
3189 {
3190 optab = optab_for_tree_code (code, vectype, optab_vector);
3191 if (optab
3192 && (optab_handler (optab, TYPE_MODE (vectype))
3193 != CODE_FOR_nothing))
3194 {
3195 scalar_shift_arg = false;
3196
3197 if (vect_print_dump_info (REPORT_DETAILS))
3198 fprintf (vect_dump, "vector/vector shift/rotate found.");
3199
3200 /* Unlike the other binary operators, shifts/rotates have
3201 the rhs being int, instead of the same type as the lhs,
3202 so make sure the scalar is the right type if we are
3203 dealing with vectors of long long/long/short/char. */
3204 if (dt[1] == vect_constant_def)
3205 op1 = fold_convert (TREE_TYPE (vectype), op1);
3206 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3207 TREE_TYPE (op1)))
3208 {
3209 if (slp_node
3210 && TYPE_MODE (TREE_TYPE (vectype))
3211 != TYPE_MODE (TREE_TYPE (op1)))
3212 {
3213 if (vect_print_dump_info (REPORT_DETAILS))
3214 fprintf (vect_dump, "unusable type for last operand in"
3215 " vector/vector shift/rotate.");
3216 return false;
3217 }
3218 if (vec_stmt && !slp_node)
3219 {
3220 op1 = fold_convert (TREE_TYPE (vectype), op1);
3221 op1 = vect_init_vector (stmt, op1,
3222 TREE_TYPE (vectype), NULL);
3223 }
3224 }
3225 }
3226 }
3227 }
3228
3229 /* Supportable by target? */
3230 if (!optab)
3231 {
3232 if (vect_print_dump_info (REPORT_DETAILS))
3233 fprintf (vect_dump, "no optab.");
3234 return false;
3235 }
3236 vec_mode = TYPE_MODE (vectype);
3237 icode = (int) optab_handler (optab, vec_mode);
3238 if (icode == CODE_FOR_nothing)
3239 {
3240 if (vect_print_dump_info (REPORT_DETAILS))
3241 fprintf (vect_dump, "op not supported by target.");
3242 /* Check only during analysis. */
3243 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3244 || (vf < vect_min_worthwhile_factor (code)
3245 && !vec_stmt))
3246 return false;
3247 if (vect_print_dump_info (REPORT_DETAILS))
3248 fprintf (vect_dump, "proceeding using word mode.");
3249 }
3250
3251 /* Worthwhile without SIMD support? Check only during analysis. */
3252 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3253 && vf < vect_min_worthwhile_factor (code)
3254 && !vec_stmt)
3255 {
3256 if (vect_print_dump_info (REPORT_DETAILS))
3257 fprintf (vect_dump, "not worthwhile without SIMD support.");
3258 return false;
3259 }
3260
3261 if (!vec_stmt) /* transformation not required. */
3262 {
3263 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3264 if (vect_print_dump_info (REPORT_DETAILS))
3265 fprintf (vect_dump, "=== vectorizable_shift ===");
3266 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3267 return true;
3268 }
3269
3270 /** Transform. **/
3271
3272 if (vect_print_dump_info (REPORT_DETAILS))
3273 fprintf (vect_dump, "transform binary/unary operation.");
3274
3275 /* Handle def. */
3276 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3277
3278 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3279 created in the previous stages of the recursion, so no allocation is
3280 needed, except for the case of shift with scalar shift argument. In that
3281 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3282 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3283 In case of loop-based vectorization we allocate VECs of size 1. We
3284 allocate VEC_OPRNDS1 only in case of binary operation. */
3285 if (!slp_node)
3286 {
3287 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3288 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3289 }
3290 else if (scalar_shift_arg)
3291 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3292
3293 prev_stmt_info = NULL;
3294 for (j = 0; j < ncopies; j++)
3295 {
3296 /* Handle uses. */
3297 if (j == 0)
3298 {
3299 if (scalar_shift_arg)
3300 {
3301 /* Vector shl and shr insn patterns can be defined with scalar
3302 operand 2 (shift operand). In this case, use constant or loop
3303 invariant op1 directly, without extending it to vector mode
3304 first. */
3305 optab_op2_mode = insn_data[icode].operand[2].mode;
3306 if (!VECTOR_MODE_P (optab_op2_mode))
3307 {
3308 if (vect_print_dump_info (REPORT_DETAILS))
3309 fprintf (vect_dump, "operand 1 using scalar mode.");
3310 vec_oprnd1 = op1;
3311 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3312 if (slp_node)
3313 {
3314 /* Store vec_oprnd1 for every vector stmt to be created
3315 for SLP_NODE. We check during the analysis that all
3316 the shift arguments are the same.
3317 TODO: Allow different constants for different vector
3318 stmts generated for an SLP instance. */
3319 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3320 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3321 }
3322 }
3323 }
3324
3325 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3326 (a special case for certain kind of vector shifts); otherwise,
3327 operand 1 should be of a vector type (the usual case). */
3328 if (vec_oprnd1)
3329 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3330 slp_node, -1);
3331 else
3332 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3333 slp_node, -1);
3334 }
3335 else
3336 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3337
3338 /* Arguments are ready. Create the new vector stmt. */
3339 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3340 {
3341 vop1 = VEC_index (tree, vec_oprnds1, i);
3342 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3343 new_temp = make_ssa_name (vec_dest, new_stmt);
3344 gimple_assign_set_lhs (new_stmt, new_temp);
3345 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3346 if (slp_node)
3347 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3348 }
3349
3350 if (slp_node)
3351 continue;
3352
3353 if (j == 0)
3354 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3355 else
3356 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3357 prev_stmt_info = vinfo_for_stmt (new_stmt);
3358 }
3359
3360 VEC_free (tree, heap, vec_oprnds0);
3361 VEC_free (tree, heap, vec_oprnds1);
3362
3363 return true;
3364 }
3365
3366
3367 static tree permute_vec_elements (tree, tree, tree, gimple,
3368 gimple_stmt_iterator *);
3369
3370
3371 /* Function vectorizable_operation.
3372
3373 Check if STMT performs a binary, unary or ternary operation that can
3374 be vectorized.
3375 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3376 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3377 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3378
3379 static bool
3380 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3381 gimple *vec_stmt, slp_tree slp_node)
3382 {
3383 tree vec_dest;
3384 tree scalar_dest;
3385 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3386 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3387 tree vectype;
3388 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3389 enum tree_code code;
3390 enum machine_mode vec_mode;
3391 tree new_temp;
3392 int op_type;
3393 optab optab;
3394 int icode;
3395 tree def;
3396 gimple def_stmt;
3397 enum vect_def_type dt[3]
3398 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3399 gimple new_stmt = NULL;
3400 stmt_vec_info prev_stmt_info;
3401 int nunits_in;
3402 int nunits_out;
3403 tree vectype_out;
3404 int ncopies;
3405 int j, i;
3406 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3407 tree vop0, vop1, vop2;
3408 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3409 int vf;
3410
3411 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3412 return false;
3413
3414 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3415 return false;
3416
3417 /* Is STMT a vectorizable binary/unary operation? */
3418 if (!is_gimple_assign (stmt))
3419 return false;
3420
3421 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3422 return false;
3423
3424 code = gimple_assign_rhs_code (stmt);
3425
3426 /* For pointer addition, we should use the normal plus for
3427 the vector addition. */
3428 if (code == POINTER_PLUS_EXPR)
3429 code = PLUS_EXPR;
3430
3431 /* Support only unary or binary operations. */
3432 op_type = TREE_CODE_LENGTH (code);
3433 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3434 {
3435 if (vect_print_dump_info (REPORT_DETAILS))
3436 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3437 op_type);
3438 return false;
3439 }
3440
3441 scalar_dest = gimple_assign_lhs (stmt);
3442 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3443
3444 /* Most operations cannot handle bit-precision types without extra
3445 truncations. */
3446 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3447 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3448 /* Exception are bitwise binary operations. */
3449 && code != BIT_IOR_EXPR
3450 && code != BIT_XOR_EXPR
3451 && code != BIT_AND_EXPR)
3452 {
3453 if (vect_print_dump_info (REPORT_DETAILS))
3454 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3455 return false;
3456 }
3457
3458 op0 = gimple_assign_rhs1 (stmt);
3459 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3460 &def_stmt, &def, &dt[0], &vectype))
3461 {
3462 if (vect_print_dump_info (REPORT_DETAILS))
3463 fprintf (vect_dump, "use not simple.");
3464 return false;
3465 }
3466 /* If op0 is an external or constant def use a vector type with
3467 the same size as the output vector type. */
3468 if (!vectype)
3469 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3470 if (vec_stmt)
3471 gcc_assert (vectype);
3472 if (!vectype)
3473 {
3474 if (vect_print_dump_info (REPORT_DETAILS))
3475 {
3476 fprintf (vect_dump, "no vectype for scalar type ");
3477 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3478 }
3479
3480 return false;
3481 }
3482
3483 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3484 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3485 if (nunits_out != nunits_in)
3486 return false;
3487
3488 if (op_type == binary_op || op_type == ternary_op)
3489 {
3490 op1 = gimple_assign_rhs2 (stmt);
3491 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3492 &def, &dt[1]))
3493 {
3494 if (vect_print_dump_info (REPORT_DETAILS))
3495 fprintf (vect_dump, "use not simple.");
3496 return false;
3497 }
3498 }
3499 if (op_type == ternary_op)
3500 {
3501 op2 = gimple_assign_rhs3 (stmt);
3502 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3503 &def, &dt[2]))
3504 {
3505 if (vect_print_dump_info (REPORT_DETAILS))
3506 fprintf (vect_dump, "use not simple.");
3507 return false;
3508 }
3509 }
3510
3511 if (loop_vinfo)
3512 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3513 else
3514 vf = 1;
3515
3516 /* Multiple types in SLP are handled by creating the appropriate number of
3517 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3518 case of SLP. */
3519 if (slp_node || PURE_SLP_STMT (stmt_info))
3520 ncopies = 1;
3521 else
3522 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3523
3524 gcc_assert (ncopies >= 1);
3525
3526 /* Shifts are handled in vectorizable_shift (). */
3527 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3528 || code == RROTATE_EXPR)
3529 return false;
3530
3531 /* Supportable by target? */
3532
3533 vec_mode = TYPE_MODE (vectype);
3534 if (code == MULT_HIGHPART_EXPR)
3535 {
3536 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3537 icode = 0;
3538 else
3539 icode = CODE_FOR_nothing;
3540 }
3541 else
3542 {
3543 optab = optab_for_tree_code (code, vectype, optab_default);
3544 if (!optab)
3545 {
3546 if (vect_print_dump_info (REPORT_DETAILS))
3547 fprintf (vect_dump, "no optab.");
3548 return false;
3549 }
3550 icode = (int) optab_handler (optab, vec_mode);
3551 }
3552
3553 if (icode == CODE_FOR_nothing)
3554 {
3555 if (vect_print_dump_info (REPORT_DETAILS))
3556 fprintf (vect_dump, "op not supported by target.");
3557 /* Check only during analysis. */
3558 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3559 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3560 return false;
3561 if (vect_print_dump_info (REPORT_DETAILS))
3562 fprintf (vect_dump, "proceeding using word mode.");
3563 }
3564
3565 /* Worthwhile without SIMD support? Check only during analysis. */
3566 if (!VECTOR_MODE_P (vec_mode)
3567 && !vec_stmt
3568 && vf < vect_min_worthwhile_factor (code))
3569 {
3570 if (vect_print_dump_info (REPORT_DETAILS))
3571 fprintf (vect_dump, "not worthwhile without SIMD support.");
3572 return false;
3573 }
3574
3575 if (!vec_stmt) /* transformation not required. */
3576 {
3577 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3578 if (vect_print_dump_info (REPORT_DETAILS))
3579 fprintf (vect_dump, "=== vectorizable_operation ===");
3580 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3581 return true;
3582 }
3583
3584 /** Transform. **/
3585
3586 if (vect_print_dump_info (REPORT_DETAILS))
3587 fprintf (vect_dump, "transform binary/unary operation.");
3588
3589 /* Handle def. */
3590 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3591
3592 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3593 created in the previous stages of the recursion, so no allocation is
3594 needed, except for the case of shift with scalar shift argument. In that
3595 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3596 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3597 In case of loop-based vectorization we allocate VECs of size 1. We
3598 allocate VEC_OPRNDS1 only in case of binary operation. */
3599 if (!slp_node)
3600 {
3601 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3602 if (op_type == binary_op || op_type == ternary_op)
3603 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3604 if (op_type == ternary_op)
3605 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3606 }
3607
3608 /* In case the vectorization factor (VF) is bigger than the number
3609 of elements that we can fit in a vectype (nunits), we have to generate
3610 more than one vector stmt - i.e - we need to "unroll" the
3611 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3612 from one copy of the vector stmt to the next, in the field
3613 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3614 stages to find the correct vector defs to be used when vectorizing
3615 stmts that use the defs of the current stmt. The example below
3616 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3617 we need to create 4 vectorized stmts):
3618
3619 before vectorization:
3620 RELATED_STMT VEC_STMT
3621 S1: x = memref - -
3622 S2: z = x + 1 - -
3623
3624 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3625 there):
3626 RELATED_STMT VEC_STMT
3627 VS1_0: vx0 = memref0 VS1_1 -
3628 VS1_1: vx1 = memref1 VS1_2 -
3629 VS1_2: vx2 = memref2 VS1_3 -
3630 VS1_3: vx3 = memref3 - -
3631 S1: x = load - VS1_0
3632 S2: z = x + 1 - -
3633
3634 step2: vectorize stmt S2 (done here):
3635 To vectorize stmt S2 we first need to find the relevant vector
3636 def for the first operand 'x'. This is, as usual, obtained from
3637 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3638 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3639 relevant vector def 'vx0'. Having found 'vx0' we can generate
3640 the vector stmt VS2_0, and as usual, record it in the
3641 STMT_VINFO_VEC_STMT of stmt S2.
3642 When creating the second copy (VS2_1), we obtain the relevant vector
3643 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3644 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3645 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3646 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3647 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3648 chain of stmts and pointers:
3649 RELATED_STMT VEC_STMT
3650 VS1_0: vx0 = memref0 VS1_1 -
3651 VS1_1: vx1 = memref1 VS1_2 -
3652 VS1_2: vx2 = memref2 VS1_3 -
3653 VS1_3: vx3 = memref3 - -
3654 S1: x = load - VS1_0
3655 VS2_0: vz0 = vx0 + v1 VS2_1 -
3656 VS2_1: vz1 = vx1 + v1 VS2_2 -
3657 VS2_2: vz2 = vx2 + v1 VS2_3 -
3658 VS2_3: vz3 = vx3 + v1 - -
3659 S2: z = x + 1 - VS2_0 */
3660
3661 prev_stmt_info = NULL;
3662 for (j = 0; j < ncopies; j++)
3663 {
3664 /* Handle uses. */
3665 if (j == 0)
3666 {
3667 if (op_type == binary_op || op_type == ternary_op)
3668 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3669 slp_node, -1);
3670 else
3671 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3672 slp_node, -1);
3673 if (op_type == ternary_op)
3674 {
3675 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3676 VEC_quick_push (tree, vec_oprnds2,
3677 vect_get_vec_def_for_operand (op2, stmt, NULL));
3678 }
3679 }
3680 else
3681 {
3682 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3683 if (op_type == ternary_op)
3684 {
3685 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3686 VEC_quick_push (tree, vec_oprnds2,
3687 vect_get_vec_def_for_stmt_copy (dt[2],
3688 vec_oprnd));
3689 }
3690 }
3691
3692 /* Arguments are ready. Create the new vector stmt. */
3693 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3694 {
3695 vop1 = ((op_type == binary_op || op_type == ternary_op)
3696 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3697 vop2 = ((op_type == ternary_op)
3698 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3699 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3700 vop0, vop1, vop2);
3701 new_temp = make_ssa_name (vec_dest, new_stmt);
3702 gimple_assign_set_lhs (new_stmt, new_temp);
3703 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3704 if (slp_node)
3705 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3706 }
3707
3708 if (slp_node)
3709 continue;
3710
3711 if (j == 0)
3712 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3713 else
3714 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3715 prev_stmt_info = vinfo_for_stmt (new_stmt);
3716 }
3717
3718 VEC_free (tree, heap, vec_oprnds0);
3719 if (vec_oprnds1)
3720 VEC_free (tree, heap, vec_oprnds1);
3721 if (vec_oprnds2)
3722 VEC_free (tree, heap, vec_oprnds2);
3723
3724 return true;
3725 }
3726
3727
3728 /* Function vectorizable_store.
3729
3730 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3731 can be vectorized.
3732 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3733 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3734 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3735
3736 static bool
3737 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3738 slp_tree slp_node)
3739 {
3740 tree scalar_dest;
3741 tree data_ref;
3742 tree op;
3743 tree vec_oprnd = NULL_TREE;
3744 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3745 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3746 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3747 tree elem_type;
3748 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3749 struct loop *loop = NULL;
3750 enum machine_mode vec_mode;
3751 tree dummy;
3752 enum dr_alignment_support alignment_support_scheme;
3753 tree def;
3754 gimple def_stmt;
3755 enum vect_def_type dt;
3756 stmt_vec_info prev_stmt_info = NULL;
3757 tree dataref_ptr = NULL_TREE;
3758 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3759 int ncopies;
3760 int j;
3761 gimple next_stmt, first_stmt = NULL;
3762 bool grouped_store = false;
3763 bool store_lanes_p = false;
3764 unsigned int group_size, i;
3765 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3766 bool inv_p;
3767 VEC(tree,heap) *vec_oprnds = NULL;
3768 bool slp = (slp_node != NULL);
3769 unsigned int vec_num;
3770 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3771 tree aggr_type;
3772
3773 if (loop_vinfo)
3774 loop = LOOP_VINFO_LOOP (loop_vinfo);
3775
3776 /* Multiple types in SLP are handled by creating the appropriate number of
3777 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3778 case of SLP. */
3779 if (slp || PURE_SLP_STMT (stmt_info))
3780 ncopies = 1;
3781 else
3782 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3783
3784 gcc_assert (ncopies >= 1);
3785
3786 /* FORNOW. This restriction should be relaxed. */
3787 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3788 {
3789 if (vect_print_dump_info (REPORT_DETAILS))
3790 fprintf (vect_dump, "multiple types in nested loop.");
3791 return false;
3792 }
3793
3794 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3795 return false;
3796
3797 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3798 return false;
3799
3800 /* Is vectorizable store? */
3801
3802 if (!is_gimple_assign (stmt))
3803 return false;
3804
3805 scalar_dest = gimple_assign_lhs (stmt);
3806 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3807 && is_pattern_stmt_p (stmt_info))
3808 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3809 if (TREE_CODE (scalar_dest) != ARRAY_REF
3810 && TREE_CODE (scalar_dest) != INDIRECT_REF
3811 && TREE_CODE (scalar_dest) != COMPONENT_REF
3812 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3813 && TREE_CODE (scalar_dest) != REALPART_EXPR
3814 && TREE_CODE (scalar_dest) != MEM_REF)
3815 return false;
3816
3817 gcc_assert (gimple_assign_single_p (stmt));
3818 op = gimple_assign_rhs1 (stmt);
3819 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3820 &def, &dt))
3821 {
3822 if (vect_print_dump_info (REPORT_DETAILS))
3823 fprintf (vect_dump, "use not simple.");
3824 return false;
3825 }
3826
3827 elem_type = TREE_TYPE (vectype);
3828 vec_mode = TYPE_MODE (vectype);
3829
3830 /* FORNOW. In some cases can vectorize even if data-type not supported
3831 (e.g. - array initialization with 0). */
3832 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3833 return false;
3834
3835 if (!STMT_VINFO_DATA_REF (stmt_info))
3836 return false;
3837
3838 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3839 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3840 size_zero_node) < 0)
3841 {
3842 if (vect_print_dump_info (REPORT_DETAILS))
3843 fprintf (vect_dump, "negative step for store.");
3844 return false;
3845 }
3846
3847 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3848 {
3849 grouped_store = true;
3850 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3851 if (!slp && !PURE_SLP_STMT (stmt_info))
3852 {
3853 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3854 if (vect_store_lanes_supported (vectype, group_size))
3855 store_lanes_p = true;
3856 else if (!vect_grouped_store_supported (vectype, group_size))
3857 return false;
3858 }
3859
3860 if (first_stmt == stmt)
3861 {
3862 /* STMT is the leader of the group. Check the operands of all the
3863 stmts of the group. */
3864 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3865 while (next_stmt)
3866 {
3867 gcc_assert (gimple_assign_single_p (next_stmt));
3868 op = gimple_assign_rhs1 (next_stmt);
3869 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3870 &def_stmt, &def, &dt))
3871 {
3872 if (vect_print_dump_info (REPORT_DETAILS))
3873 fprintf (vect_dump, "use not simple.");
3874 return false;
3875 }
3876 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3877 }
3878 }
3879 }
3880
3881 if (!vec_stmt) /* transformation not required. */
3882 {
3883 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3884 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL, NULL);
3885 return true;
3886 }
3887
3888 /** Transform. **/
3889
3890 if (grouped_store)
3891 {
3892 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3893 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3894
3895 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3896
3897 /* FORNOW */
3898 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3899
3900 /* We vectorize all the stmts of the interleaving group when we
3901 reach the last stmt in the group. */
3902 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3903 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3904 && !slp)
3905 {
3906 *vec_stmt = NULL;
3907 return true;
3908 }
3909
3910 if (slp)
3911 {
3912 grouped_store = false;
3913 /* VEC_NUM is the number of vect stmts to be created for this
3914 group. */
3915 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3916 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3917 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3918 op = gimple_assign_rhs1 (first_stmt);
3919 }
3920 else
3921 /* VEC_NUM is the number of vect stmts to be created for this
3922 group. */
3923 vec_num = group_size;
3924 }
3925 else
3926 {
3927 first_stmt = stmt;
3928 first_dr = dr;
3929 group_size = vec_num = 1;
3930 }
3931
3932 if (vect_print_dump_info (REPORT_DETAILS))
3933 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3934
3935 dr_chain = VEC_alloc (tree, heap, group_size);
3936 oprnds = VEC_alloc (tree, heap, group_size);
3937
3938 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3939 gcc_assert (alignment_support_scheme);
3940 /* Targets with store-lane instructions must not require explicit
3941 realignment. */
3942 gcc_assert (!store_lanes_p
3943 || alignment_support_scheme == dr_aligned
3944 || alignment_support_scheme == dr_unaligned_supported);
3945
3946 if (store_lanes_p)
3947 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3948 else
3949 aggr_type = vectype;
3950
3951 /* In case the vectorization factor (VF) is bigger than the number
3952 of elements that we can fit in a vectype (nunits), we have to generate
3953 more than one vector stmt - i.e - we need to "unroll" the
3954 vector stmt by a factor VF/nunits. For more details see documentation in
3955 vect_get_vec_def_for_copy_stmt. */
3956
3957 /* In case of interleaving (non-unit grouped access):
3958
3959 S1: &base + 2 = x2
3960 S2: &base = x0
3961 S3: &base + 1 = x1
3962 S4: &base + 3 = x3
3963
3964 We create vectorized stores starting from base address (the access of the
3965 first stmt in the chain (S2 in the above example), when the last store stmt
3966 of the chain (S4) is reached:
3967
3968 VS1: &base = vx2
3969 VS2: &base + vec_size*1 = vx0
3970 VS3: &base + vec_size*2 = vx1
3971 VS4: &base + vec_size*3 = vx3
3972
3973 Then permutation statements are generated:
3974
3975 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3976 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3977 ...
3978
3979 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3980 (the order of the data-refs in the output of vect_permute_store_chain
3981 corresponds to the order of scalar stmts in the interleaving chain - see
3982 the documentation of vect_permute_store_chain()).
3983
3984 In case of both multiple types and interleaving, above vector stores and
3985 permutation stmts are created for every copy. The result vector stmts are
3986 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3987 STMT_VINFO_RELATED_STMT for the next copies.
3988 */
3989
3990 prev_stmt_info = NULL;
3991 for (j = 0; j < ncopies; j++)
3992 {
3993 gimple new_stmt;
3994 gimple ptr_incr;
3995
3996 if (j == 0)
3997 {
3998 if (slp)
3999 {
4000 /* Get vectorized arguments for SLP_NODE. */
4001 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4002 NULL, slp_node, -1);
4003
4004 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4005 }
4006 else
4007 {
4008 /* For interleaved stores we collect vectorized defs for all the
4009 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4010 used as an input to vect_permute_store_chain(), and OPRNDS as
4011 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4012
4013 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4014 OPRNDS are of size 1. */
4015 next_stmt = first_stmt;
4016 for (i = 0; i < group_size; i++)
4017 {
4018 /* Since gaps are not supported for interleaved stores,
4019 GROUP_SIZE is the exact number of stmts in the chain.
4020 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4021 there is no interleaving, GROUP_SIZE is 1, and only one
4022 iteration of the loop will be executed. */
4023 gcc_assert (next_stmt
4024 && gimple_assign_single_p (next_stmt));
4025 op = gimple_assign_rhs1 (next_stmt);
4026
4027 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4028 NULL);
4029 VEC_quick_push(tree, dr_chain, vec_oprnd);
4030 VEC_quick_push(tree, oprnds, vec_oprnd);
4031 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4032 }
4033 }
4034
4035 /* We should have catched mismatched types earlier. */
4036 gcc_assert (useless_type_conversion_p (vectype,
4037 TREE_TYPE (vec_oprnd)));
4038 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4039 NULL_TREE, &dummy, gsi,
4040 &ptr_incr, false, &inv_p);
4041 gcc_assert (bb_vinfo || !inv_p);
4042 }
4043 else
4044 {
4045 /* For interleaved stores we created vectorized defs for all the
4046 defs stored in OPRNDS in the previous iteration (previous copy).
4047 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4048 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4049 next copy.
4050 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4051 OPRNDS are of size 1. */
4052 for (i = 0; i < group_size; i++)
4053 {
4054 op = VEC_index (tree, oprnds, i);
4055 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4056 &def, &dt);
4057 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4058 VEC_replace(tree, dr_chain, i, vec_oprnd);
4059 VEC_replace(tree, oprnds, i, vec_oprnd);
4060 }
4061 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4062 TYPE_SIZE_UNIT (aggr_type));
4063 }
4064
4065 if (store_lanes_p)
4066 {
4067 tree vec_array;
4068
4069 /* Combine all the vectors into an array. */
4070 vec_array = create_vector_array (vectype, vec_num);
4071 for (i = 0; i < vec_num; i++)
4072 {
4073 vec_oprnd = VEC_index (tree, dr_chain, i);
4074 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4075 }
4076
4077 /* Emit:
4078 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4079 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4080 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4081 gimple_call_set_lhs (new_stmt, data_ref);
4082 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4083 }
4084 else
4085 {
4086 new_stmt = NULL;
4087 if (grouped_store)
4088 {
4089 result_chain = VEC_alloc (tree, heap, group_size);
4090 /* Permute. */
4091 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4092 &result_chain);
4093 }
4094
4095 next_stmt = first_stmt;
4096 for (i = 0; i < vec_num; i++)
4097 {
4098 unsigned align, misalign;
4099
4100 if (i > 0)
4101 /* Bump the vector pointer. */
4102 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4103 stmt, NULL_TREE);
4104
4105 if (slp)
4106 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4107 else if (grouped_store)
4108 /* For grouped stores vectorized defs are interleaved in
4109 vect_permute_store_chain(). */
4110 vec_oprnd = VEC_index (tree, result_chain, i);
4111
4112 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4113 build_int_cst (reference_alias_ptr_type
4114 (DR_REF (first_dr)), 0));
4115 align = TYPE_ALIGN_UNIT (vectype);
4116 if (aligned_access_p (first_dr))
4117 misalign = 0;
4118 else if (DR_MISALIGNMENT (first_dr) == -1)
4119 {
4120 TREE_TYPE (data_ref)
4121 = build_aligned_type (TREE_TYPE (data_ref),
4122 TYPE_ALIGN (elem_type));
4123 align = TYPE_ALIGN_UNIT (elem_type);
4124 misalign = 0;
4125 }
4126 else
4127 {
4128 TREE_TYPE (data_ref)
4129 = build_aligned_type (TREE_TYPE (data_ref),
4130 TYPE_ALIGN (elem_type));
4131 misalign = DR_MISALIGNMENT (first_dr);
4132 }
4133 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4134 misalign);
4135
4136 /* Arguments are ready. Create the new vector stmt. */
4137 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4138 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4139
4140 if (slp)
4141 continue;
4142
4143 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4144 if (!next_stmt)
4145 break;
4146 }
4147 }
4148 if (!slp)
4149 {
4150 if (j == 0)
4151 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4152 else
4153 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4154 prev_stmt_info = vinfo_for_stmt (new_stmt);
4155 }
4156 }
4157
4158 VEC_free (tree, heap, dr_chain);
4159 VEC_free (tree, heap, oprnds);
4160 if (result_chain)
4161 VEC_free (tree, heap, result_chain);
4162 if (vec_oprnds)
4163 VEC_free (tree, heap, vec_oprnds);
4164
4165 return true;
4166 }
4167
4168 /* Given a vector type VECTYPE and permutation SEL returns
4169 the VECTOR_CST mask that implements the permutation of the
4170 vector elements. If that is impossible to do, returns NULL. */
4171
4172 tree
4173 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4174 {
4175 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4176 int i, nunits;
4177
4178 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4179
4180 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4181 return NULL;
4182
4183 mask_elt_type = lang_hooks.types.type_for_mode
4184 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4185 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4186
4187 mask_elts = XALLOCAVEC (tree, nunits);
4188 for (i = nunits - 1; i >= 0; i--)
4189 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4190 mask_vec = build_vector (mask_type, mask_elts);
4191
4192 return mask_vec;
4193 }
4194
4195 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4196 reversal of the vector elements. If that is impossible to do,
4197 returns NULL. */
4198
4199 static tree
4200 perm_mask_for_reverse (tree vectype)
4201 {
4202 int i, nunits;
4203 unsigned char *sel;
4204
4205 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4206 sel = XALLOCAVEC (unsigned char, nunits);
4207
4208 for (i = 0; i < nunits; ++i)
4209 sel[i] = nunits - 1 - i;
4210
4211 return vect_gen_perm_mask (vectype, sel);
4212 }
4213
4214 /* Given a vector variable X and Y, that was generated for the scalar
4215 STMT, generate instructions to permute the vector elements of X and Y
4216 using permutation mask MASK_VEC, insert them at *GSI and return the
4217 permuted vector variable. */
4218
4219 static tree
4220 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4221 gimple_stmt_iterator *gsi)
4222 {
4223 tree vectype = TREE_TYPE (x);
4224 tree perm_dest, data_ref;
4225 gimple perm_stmt;
4226
4227 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4228 data_ref = make_ssa_name (perm_dest, NULL);
4229
4230 /* Generate the permute statement. */
4231 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4232 x, y, mask_vec);
4233 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4234
4235 return data_ref;
4236 }
4237
4238 /* vectorizable_load.
4239
4240 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4241 can be vectorized.
4242 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4243 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4244 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4245
4246 static bool
4247 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4248 slp_tree slp_node, slp_instance slp_node_instance)
4249 {
4250 tree scalar_dest;
4251 tree vec_dest = NULL;
4252 tree data_ref = NULL;
4253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4254 stmt_vec_info prev_stmt_info;
4255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4256 struct loop *loop = NULL;
4257 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4258 bool nested_in_vect_loop = false;
4259 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4260 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4261 tree elem_type;
4262 tree new_temp;
4263 enum machine_mode mode;
4264 gimple new_stmt = NULL;
4265 tree dummy;
4266 enum dr_alignment_support alignment_support_scheme;
4267 tree dataref_ptr = NULL_TREE;
4268 gimple ptr_incr;
4269 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4270 int ncopies;
4271 int i, j, group_size;
4272 tree msq = NULL_TREE, lsq;
4273 tree offset = NULL_TREE;
4274 tree realignment_token = NULL_TREE;
4275 gimple phi = NULL;
4276 VEC(tree,heap) *dr_chain = NULL;
4277 bool grouped_load = false;
4278 bool load_lanes_p = false;
4279 gimple first_stmt;
4280 bool inv_p;
4281 bool negative = false;
4282 bool compute_in_loop = false;
4283 struct loop *at_loop;
4284 int vec_num;
4285 bool slp = (slp_node != NULL);
4286 bool slp_perm = false;
4287 enum tree_code code;
4288 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4289 int vf;
4290 tree aggr_type;
4291 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4292 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4293 tree stride_base, stride_step;
4294 int gather_scale = 1;
4295 enum vect_def_type gather_dt = vect_unknown_def_type;
4296
4297 if (loop_vinfo)
4298 {
4299 loop = LOOP_VINFO_LOOP (loop_vinfo);
4300 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4301 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4302 }
4303 else
4304 vf = 1;
4305
4306 /* Multiple types in SLP are handled by creating the appropriate number of
4307 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4308 case of SLP. */
4309 if (slp || PURE_SLP_STMT (stmt_info))
4310 ncopies = 1;
4311 else
4312 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4313
4314 gcc_assert (ncopies >= 1);
4315
4316 /* FORNOW. This restriction should be relaxed. */
4317 if (nested_in_vect_loop && ncopies > 1)
4318 {
4319 if (vect_print_dump_info (REPORT_DETAILS))
4320 fprintf (vect_dump, "multiple types in nested loop.");
4321 return false;
4322 }
4323
4324 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4325 return false;
4326
4327 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4328 return false;
4329
4330 /* Is vectorizable load? */
4331 if (!is_gimple_assign (stmt))
4332 return false;
4333
4334 scalar_dest = gimple_assign_lhs (stmt);
4335 if (TREE_CODE (scalar_dest) != SSA_NAME)
4336 return false;
4337
4338 code = gimple_assign_rhs_code (stmt);
4339 if (code != ARRAY_REF
4340 && code != INDIRECT_REF
4341 && code != COMPONENT_REF
4342 && code != IMAGPART_EXPR
4343 && code != REALPART_EXPR
4344 && code != MEM_REF
4345 && TREE_CODE_CLASS (code) != tcc_declaration)
4346 return false;
4347
4348 if (!STMT_VINFO_DATA_REF (stmt_info))
4349 return false;
4350
4351 elem_type = TREE_TYPE (vectype);
4352 mode = TYPE_MODE (vectype);
4353
4354 /* FORNOW. In some cases can vectorize even if data-type not supported
4355 (e.g. - data copies). */
4356 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4357 {
4358 if (vect_print_dump_info (REPORT_DETAILS))
4359 fprintf (vect_dump, "Aligned load, but unsupported type.");
4360 return false;
4361 }
4362
4363 /* Check if the load is a part of an interleaving chain. */
4364 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4365 {
4366 grouped_load = true;
4367 /* FORNOW */
4368 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4369
4370 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4371 if (!slp && !PURE_SLP_STMT (stmt_info))
4372 {
4373 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4374 if (vect_load_lanes_supported (vectype, group_size))
4375 load_lanes_p = true;
4376 else if (!vect_grouped_load_supported (vectype, group_size))
4377 return false;
4378 }
4379 }
4380
4381
4382 if (STMT_VINFO_GATHER_P (stmt_info))
4383 {
4384 gimple def_stmt;
4385 tree def;
4386 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4387 &gather_off, &gather_scale);
4388 gcc_assert (gather_decl);
4389 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4390 &def_stmt, &def, &gather_dt,
4391 &gather_off_vectype))
4392 {
4393 if (vect_print_dump_info (REPORT_DETAILS))
4394 fprintf (vect_dump, "gather index use not simple.");
4395 return false;
4396 }
4397 }
4398 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4399 {
4400 if (!vect_check_strided_load (stmt, loop_vinfo,
4401 &stride_base, &stride_step))
4402 return false;
4403 }
4404 else
4405 {
4406 negative = tree_int_cst_compare (nested_in_vect_loop
4407 ? STMT_VINFO_DR_STEP (stmt_info)
4408 : DR_STEP (dr),
4409 size_zero_node) < 0;
4410 if (negative && ncopies > 1)
4411 {
4412 if (vect_print_dump_info (REPORT_DETAILS))
4413 fprintf (vect_dump, "multiple types with negative step.");
4414 return false;
4415 }
4416
4417 if (negative)
4418 {
4419 gcc_assert (!grouped_load);
4420 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4421 if (alignment_support_scheme != dr_aligned
4422 && alignment_support_scheme != dr_unaligned_supported)
4423 {
4424 if (vect_print_dump_info (REPORT_DETAILS))
4425 fprintf (vect_dump, "negative step but alignment required.");
4426 return false;
4427 }
4428 if (!perm_mask_for_reverse (vectype))
4429 {
4430 if (vect_print_dump_info (REPORT_DETAILS))
4431 fprintf (vect_dump, "negative step and reversing not supported.");
4432 return false;
4433 }
4434 }
4435 }
4436
4437 if (!vec_stmt) /* transformation not required. */
4438 {
4439 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4440 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL);
4441 return true;
4442 }
4443
4444 if (vect_print_dump_info (REPORT_DETAILS))
4445 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4446
4447 /** Transform. **/
4448
4449 if (STMT_VINFO_GATHER_P (stmt_info))
4450 {
4451 tree vec_oprnd0 = NULL_TREE, op;
4452 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4453 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4454 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4455 edge pe = loop_preheader_edge (loop);
4456 gimple_seq seq;
4457 basic_block new_bb;
4458 enum { NARROW, NONE, WIDEN } modifier;
4459 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4460
4461 if (nunits == gather_off_nunits)
4462 modifier = NONE;
4463 else if (nunits == gather_off_nunits / 2)
4464 {
4465 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4466 modifier = WIDEN;
4467
4468 for (i = 0; i < gather_off_nunits; ++i)
4469 sel[i] = i | nunits;
4470
4471 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4472 gcc_assert (perm_mask != NULL_TREE);
4473 }
4474 else if (nunits == gather_off_nunits * 2)
4475 {
4476 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4477 modifier = NARROW;
4478
4479 for (i = 0; i < nunits; ++i)
4480 sel[i] = i < gather_off_nunits
4481 ? i : i + nunits - gather_off_nunits;
4482
4483 perm_mask = vect_gen_perm_mask (vectype, sel);
4484 gcc_assert (perm_mask != NULL_TREE);
4485 ncopies *= 2;
4486 }
4487 else
4488 gcc_unreachable ();
4489
4490 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4491 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4492 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4493 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4494 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4495 scaletype = TREE_VALUE (arglist);
4496 gcc_checking_assert (types_compatible_p (srctype, rettype)
4497 && types_compatible_p (srctype, masktype));
4498
4499 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4500
4501 ptr = fold_convert (ptrtype, gather_base);
4502 if (!is_gimple_min_invariant (ptr))
4503 {
4504 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4505 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4506 gcc_assert (!new_bb);
4507 }
4508
4509 /* Currently we support only unconditional gather loads,
4510 so mask should be all ones. */
4511 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4512 mask = build_int_cst (TREE_TYPE (masktype), -1);
4513 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4514 {
4515 REAL_VALUE_TYPE r;
4516 long tmp[6];
4517 for (j = 0; j < 6; ++j)
4518 tmp[j] = -1;
4519 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4520 mask = build_real (TREE_TYPE (masktype), r);
4521 }
4522 else
4523 gcc_unreachable ();
4524 mask = build_vector_from_val (masktype, mask);
4525 mask = vect_init_vector (stmt, mask, masktype, NULL);
4526
4527 scale = build_int_cst (scaletype, gather_scale);
4528
4529 prev_stmt_info = NULL;
4530 for (j = 0; j < ncopies; ++j)
4531 {
4532 if (modifier == WIDEN && (j & 1))
4533 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4534 perm_mask, stmt, gsi);
4535 else if (j == 0)
4536 op = vec_oprnd0
4537 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4538 else
4539 op = vec_oprnd0
4540 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4541
4542 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4543 {
4544 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4545 == TYPE_VECTOR_SUBPARTS (idxtype));
4546 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4547 add_referenced_var (var);
4548 var = make_ssa_name (var, NULL);
4549 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4550 new_stmt
4551 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4552 op, NULL_TREE);
4553 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4554 op = var;
4555 }
4556
4557 new_stmt
4558 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4559
4560 if (!useless_type_conversion_p (vectype, rettype))
4561 {
4562 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4563 == TYPE_VECTOR_SUBPARTS (rettype));
4564 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4565 add_referenced_var (var);
4566 op = make_ssa_name (var, new_stmt);
4567 gimple_call_set_lhs (new_stmt, op);
4568 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4569 var = make_ssa_name (vec_dest, NULL);
4570 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4571 new_stmt
4572 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4573 NULL_TREE);
4574 }
4575 else
4576 {
4577 var = make_ssa_name (vec_dest, new_stmt);
4578 gimple_call_set_lhs (new_stmt, var);
4579 }
4580
4581 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4582
4583 if (modifier == NARROW)
4584 {
4585 if ((j & 1) == 0)
4586 {
4587 prev_res = var;
4588 continue;
4589 }
4590 var = permute_vec_elements (prev_res, var,
4591 perm_mask, stmt, gsi);
4592 new_stmt = SSA_NAME_DEF_STMT (var);
4593 }
4594
4595 if (prev_stmt_info == NULL)
4596 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4597 else
4598 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4599 prev_stmt_info = vinfo_for_stmt (new_stmt);
4600 }
4601 return true;
4602 }
4603 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4604 {
4605 gimple_stmt_iterator incr_gsi;
4606 bool insert_after;
4607 gimple incr;
4608 tree offvar;
4609 tree ref = DR_REF (dr);
4610 tree ivstep;
4611 tree running_off;
4612 VEC(constructor_elt, gc) *v = NULL;
4613 gimple_seq stmts = NULL;
4614
4615 gcc_assert (stride_base && stride_step);
4616
4617 /* For a load with loop-invariant (but other than power-of-2)
4618 stride (i.e. not a grouped access) like so:
4619
4620 for (i = 0; i < n; i += stride)
4621 ... = array[i];
4622
4623 we generate a new induction variable and new accesses to
4624 form a new vector (or vectors, depending on ncopies):
4625
4626 for (j = 0; ; j += VF*stride)
4627 tmp1 = array[j];
4628 tmp2 = array[j + stride];
4629 ...
4630 vectemp = {tmp1, tmp2, ...}
4631 */
4632
4633 ivstep = stride_step;
4634 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4635 build_int_cst (TREE_TYPE (ivstep), vf));
4636
4637 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4638
4639 create_iv (stride_base, ivstep, NULL,
4640 loop, &incr_gsi, insert_after,
4641 &offvar, NULL);
4642 incr = gsi_stmt (incr_gsi);
4643 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4644
4645 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4646 if (stmts)
4647 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4648
4649 prev_stmt_info = NULL;
4650 running_off = offvar;
4651 for (j = 0; j < ncopies; j++)
4652 {
4653 tree vec_inv;
4654
4655 v = VEC_alloc (constructor_elt, gc, nunits);
4656 for (i = 0; i < nunits; i++)
4657 {
4658 tree newref, newoff;
4659 gimple incr;
4660 if (TREE_CODE (ref) == ARRAY_REF)
4661 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4662 unshare_expr (TREE_OPERAND (ref, 0)),
4663 running_off,
4664 NULL_TREE, NULL_TREE);
4665 else
4666 newref = build2 (MEM_REF, TREE_TYPE (ref),
4667 running_off,
4668 TREE_OPERAND (ref, 1));
4669
4670 newref = force_gimple_operand_gsi (gsi, newref, true,
4671 NULL_TREE, true,
4672 GSI_SAME_STMT);
4673 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4674 newoff = SSA_NAME_VAR (running_off);
4675 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4676 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4677 running_off, stride_step);
4678 else
4679 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4680 running_off, stride_step);
4681 newoff = make_ssa_name (newoff, incr);
4682 gimple_assign_set_lhs (incr, newoff);
4683 vect_finish_stmt_generation (stmt, incr, gsi);
4684
4685 running_off = newoff;
4686 }
4687
4688 vec_inv = build_constructor (vectype, v);
4689 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4690 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4691
4692 if (j == 0)
4693 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4694 else
4695 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4696 prev_stmt_info = vinfo_for_stmt (new_stmt);
4697 }
4698 return true;
4699 }
4700
4701 if (grouped_load)
4702 {
4703 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4704 if (slp
4705 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4706 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4707 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4708
4709 /* Check if the chain of loads is already vectorized. */
4710 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4711 {
4712 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4713 return true;
4714 }
4715 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4716 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4717
4718 /* VEC_NUM is the number of vect stmts to be created for this group. */
4719 if (slp)
4720 {
4721 grouped_load = false;
4722 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4723 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4724 slp_perm = true;
4725 }
4726 else
4727 vec_num = group_size;
4728 }
4729 else
4730 {
4731 first_stmt = stmt;
4732 first_dr = dr;
4733 group_size = vec_num = 1;
4734 }
4735
4736 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4737 gcc_assert (alignment_support_scheme);
4738 /* Targets with load-lane instructions must not require explicit
4739 realignment. */
4740 gcc_assert (!load_lanes_p
4741 || alignment_support_scheme == dr_aligned
4742 || alignment_support_scheme == dr_unaligned_supported);
4743
4744 /* In case the vectorization factor (VF) is bigger than the number
4745 of elements that we can fit in a vectype (nunits), we have to generate
4746 more than one vector stmt - i.e - we need to "unroll" the
4747 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4748 from one copy of the vector stmt to the next, in the field
4749 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4750 stages to find the correct vector defs to be used when vectorizing
4751 stmts that use the defs of the current stmt. The example below
4752 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4753 need to create 4 vectorized stmts):
4754
4755 before vectorization:
4756 RELATED_STMT VEC_STMT
4757 S1: x = memref - -
4758 S2: z = x + 1 - -
4759
4760 step 1: vectorize stmt S1:
4761 We first create the vector stmt VS1_0, and, as usual, record a
4762 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4763 Next, we create the vector stmt VS1_1, and record a pointer to
4764 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4765 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4766 stmts and pointers:
4767 RELATED_STMT VEC_STMT
4768 VS1_0: vx0 = memref0 VS1_1 -
4769 VS1_1: vx1 = memref1 VS1_2 -
4770 VS1_2: vx2 = memref2 VS1_3 -
4771 VS1_3: vx3 = memref3 - -
4772 S1: x = load - VS1_0
4773 S2: z = x + 1 - -
4774
4775 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4776 information we recorded in RELATED_STMT field is used to vectorize
4777 stmt S2. */
4778
4779 /* In case of interleaving (non-unit grouped access):
4780
4781 S1: x2 = &base + 2
4782 S2: x0 = &base
4783 S3: x1 = &base + 1
4784 S4: x3 = &base + 3
4785
4786 Vectorized loads are created in the order of memory accesses
4787 starting from the access of the first stmt of the chain:
4788
4789 VS1: vx0 = &base
4790 VS2: vx1 = &base + vec_size*1
4791 VS3: vx3 = &base + vec_size*2
4792 VS4: vx4 = &base + vec_size*3
4793
4794 Then permutation statements are generated:
4795
4796 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4797 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4798 ...
4799
4800 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4801 (the order of the data-refs in the output of vect_permute_load_chain
4802 corresponds to the order of scalar stmts in the interleaving chain - see
4803 the documentation of vect_permute_load_chain()).
4804 The generation of permutation stmts and recording them in
4805 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4806
4807 In case of both multiple types and interleaving, the vector loads and
4808 permutation stmts above are created for every copy. The result vector
4809 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4810 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4811
4812 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4813 on a target that supports unaligned accesses (dr_unaligned_supported)
4814 we generate the following code:
4815 p = initial_addr;
4816 indx = 0;
4817 loop {
4818 p = p + indx * vectype_size;
4819 vec_dest = *(p);
4820 indx = indx + 1;
4821 }
4822
4823 Otherwise, the data reference is potentially unaligned on a target that
4824 does not support unaligned accesses (dr_explicit_realign_optimized) -
4825 then generate the following code, in which the data in each iteration is
4826 obtained by two vector loads, one from the previous iteration, and one
4827 from the current iteration:
4828 p1 = initial_addr;
4829 msq_init = *(floor(p1))
4830 p2 = initial_addr + VS - 1;
4831 realignment_token = call target_builtin;
4832 indx = 0;
4833 loop {
4834 p2 = p2 + indx * vectype_size
4835 lsq = *(floor(p2))
4836 vec_dest = realign_load (msq, lsq, realignment_token)
4837 indx = indx + 1;
4838 msq = lsq;
4839 } */
4840
4841 /* If the misalignment remains the same throughout the execution of the
4842 loop, we can create the init_addr and permutation mask at the loop
4843 preheader. Otherwise, it needs to be created inside the loop.
4844 This can only occur when vectorizing memory accesses in the inner-loop
4845 nested within an outer-loop that is being vectorized. */
4846
4847 if (nested_in_vect_loop
4848 && (TREE_INT_CST_LOW (DR_STEP (dr))
4849 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4850 {
4851 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4852 compute_in_loop = true;
4853 }
4854
4855 if ((alignment_support_scheme == dr_explicit_realign_optimized
4856 || alignment_support_scheme == dr_explicit_realign)
4857 && !compute_in_loop)
4858 {
4859 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4860 alignment_support_scheme, NULL_TREE,
4861 &at_loop);
4862 if (alignment_support_scheme == dr_explicit_realign_optimized)
4863 {
4864 phi = SSA_NAME_DEF_STMT (msq);
4865 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4866 }
4867 }
4868 else
4869 at_loop = loop;
4870
4871 if (negative)
4872 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4873
4874 if (load_lanes_p)
4875 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4876 else
4877 aggr_type = vectype;
4878
4879 prev_stmt_info = NULL;
4880 for (j = 0; j < ncopies; j++)
4881 {
4882 /* 1. Create the vector or array pointer update chain. */
4883 if (j == 0)
4884 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4885 offset, &dummy, gsi,
4886 &ptr_incr, false, &inv_p);
4887 else
4888 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4889 TYPE_SIZE_UNIT (aggr_type));
4890
4891 if (grouped_load || slp_perm)
4892 dr_chain = VEC_alloc (tree, heap, vec_num);
4893
4894 if (load_lanes_p)
4895 {
4896 tree vec_array;
4897
4898 vec_array = create_vector_array (vectype, vec_num);
4899
4900 /* Emit:
4901 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4902 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4903 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4904 gimple_call_set_lhs (new_stmt, vec_array);
4905 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4906
4907 /* Extract each vector into an SSA_NAME. */
4908 for (i = 0; i < vec_num; i++)
4909 {
4910 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4911 vec_array, i);
4912 VEC_quick_push (tree, dr_chain, new_temp);
4913 }
4914
4915 /* Record the mapping between SSA_NAMEs and statements. */
4916 vect_record_grouped_load_vectors (stmt, dr_chain);
4917 }
4918 else
4919 {
4920 for (i = 0; i < vec_num; i++)
4921 {
4922 if (i > 0)
4923 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4924 stmt, NULL_TREE);
4925
4926 /* 2. Create the vector-load in the loop. */
4927 switch (alignment_support_scheme)
4928 {
4929 case dr_aligned:
4930 case dr_unaligned_supported:
4931 {
4932 unsigned int align, misalign;
4933
4934 data_ref
4935 = build2 (MEM_REF, vectype, dataref_ptr,
4936 build_int_cst (reference_alias_ptr_type
4937 (DR_REF (first_dr)), 0));
4938 align = TYPE_ALIGN_UNIT (vectype);
4939 if (alignment_support_scheme == dr_aligned)
4940 {
4941 gcc_assert (aligned_access_p (first_dr));
4942 misalign = 0;
4943 }
4944 else if (DR_MISALIGNMENT (first_dr) == -1)
4945 {
4946 TREE_TYPE (data_ref)
4947 = build_aligned_type (TREE_TYPE (data_ref),
4948 TYPE_ALIGN (elem_type));
4949 align = TYPE_ALIGN_UNIT (elem_type);
4950 misalign = 0;
4951 }
4952 else
4953 {
4954 TREE_TYPE (data_ref)
4955 = build_aligned_type (TREE_TYPE (data_ref),
4956 TYPE_ALIGN (elem_type));
4957 misalign = DR_MISALIGNMENT (first_dr);
4958 }
4959 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
4960 align, misalign);
4961 break;
4962 }
4963 case dr_explicit_realign:
4964 {
4965 tree ptr, bump;
4966 tree vs_minus_1;
4967
4968 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4969
4970 if (compute_in_loop)
4971 msq = vect_setup_realignment (first_stmt, gsi,
4972 &realignment_token,
4973 dr_explicit_realign,
4974 dataref_ptr, NULL);
4975
4976 new_stmt = gimple_build_assign_with_ops
4977 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4978 build_int_cst
4979 (TREE_TYPE (dataref_ptr),
4980 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4981 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4982 gimple_assign_set_lhs (new_stmt, ptr);
4983 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4984 data_ref
4985 = build2 (MEM_REF, vectype, ptr,
4986 build_int_cst (reference_alias_ptr_type
4987 (DR_REF (first_dr)), 0));
4988 vec_dest = vect_create_destination_var (scalar_dest,
4989 vectype);
4990 new_stmt = gimple_build_assign (vec_dest, data_ref);
4991 new_temp = make_ssa_name (vec_dest, new_stmt);
4992 gimple_assign_set_lhs (new_stmt, new_temp);
4993 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4994 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4995 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4996 msq = new_temp;
4997
4998 bump = size_binop (MULT_EXPR, vs_minus_1,
4999 TYPE_SIZE_UNIT (elem_type));
5000 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5001 new_stmt = gimple_build_assign_with_ops
5002 (BIT_AND_EXPR, NULL_TREE, ptr,
5003 build_int_cst
5004 (TREE_TYPE (ptr),
5005 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5006 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
5007 gimple_assign_set_lhs (new_stmt, ptr);
5008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5009 data_ref
5010 = build2 (MEM_REF, vectype, ptr,
5011 build_int_cst (reference_alias_ptr_type
5012 (DR_REF (first_dr)), 0));
5013 break;
5014 }
5015 case dr_explicit_realign_optimized:
5016 new_stmt = gimple_build_assign_with_ops
5017 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
5018 build_int_cst
5019 (TREE_TYPE (dataref_ptr),
5020 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5021 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
5022 new_stmt);
5023 gimple_assign_set_lhs (new_stmt, new_temp);
5024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5025 data_ref
5026 = build2 (MEM_REF, vectype, new_temp,
5027 build_int_cst (reference_alias_ptr_type
5028 (DR_REF (first_dr)), 0));
5029 break;
5030 default:
5031 gcc_unreachable ();
5032 }
5033 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5034 new_stmt = gimple_build_assign (vec_dest, data_ref);
5035 new_temp = make_ssa_name (vec_dest, new_stmt);
5036 gimple_assign_set_lhs (new_stmt, new_temp);
5037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5038
5039 /* 3. Handle explicit realignment if necessary/supported.
5040 Create in loop:
5041 vec_dest = realign_load (msq, lsq, realignment_token) */
5042 if (alignment_support_scheme == dr_explicit_realign_optimized
5043 || alignment_support_scheme == dr_explicit_realign)
5044 {
5045 lsq = gimple_assign_lhs (new_stmt);
5046 if (!realignment_token)
5047 realignment_token = dataref_ptr;
5048 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5049 new_stmt
5050 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
5051 vec_dest, msq, lsq,
5052 realignment_token);
5053 new_temp = make_ssa_name (vec_dest, new_stmt);
5054 gimple_assign_set_lhs (new_stmt, new_temp);
5055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5056
5057 if (alignment_support_scheme == dr_explicit_realign_optimized)
5058 {
5059 gcc_assert (phi);
5060 if (i == vec_num - 1 && j == ncopies - 1)
5061 add_phi_arg (phi, lsq,
5062 loop_latch_edge (containing_loop),
5063 UNKNOWN_LOCATION);
5064 msq = lsq;
5065 }
5066 }
5067
5068 /* 4. Handle invariant-load. */
5069 if (inv_p && !bb_vinfo)
5070 {
5071 gimple_stmt_iterator gsi2 = *gsi;
5072 gcc_assert (!grouped_load);
5073 gsi_next (&gsi2);
5074 new_temp = vect_init_vector (stmt, scalar_dest,
5075 vectype, &gsi2);
5076 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5077 }
5078
5079 if (negative)
5080 {
5081 tree perm_mask = perm_mask_for_reverse (vectype);
5082 new_temp = permute_vec_elements (new_temp, new_temp,
5083 perm_mask, stmt, gsi);
5084 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5085 }
5086
5087 /* Collect vector loads and later create their permutation in
5088 vect_transform_grouped_load (). */
5089 if (grouped_load || slp_perm)
5090 VEC_quick_push (tree, dr_chain, new_temp);
5091
5092 /* Store vector loads in the corresponding SLP_NODE. */
5093 if (slp && !slp_perm)
5094 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
5095 new_stmt);
5096 }
5097 }
5098
5099 if (slp && !slp_perm)
5100 continue;
5101
5102 if (slp_perm)
5103 {
5104 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5105 slp_node_instance, false))
5106 {
5107 VEC_free (tree, heap, dr_chain);
5108 return false;
5109 }
5110 }
5111 else
5112 {
5113 if (grouped_load)
5114 {
5115 if (!load_lanes_p)
5116 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5117 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5118 }
5119 else
5120 {
5121 if (j == 0)
5122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5123 else
5124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5125 prev_stmt_info = vinfo_for_stmt (new_stmt);
5126 }
5127 }
5128 if (dr_chain)
5129 VEC_free (tree, heap, dr_chain);
5130 }
5131
5132 return true;
5133 }
5134
5135 /* Function vect_is_simple_cond.
5136
5137 Input:
5138 LOOP - the loop that is being vectorized.
5139 COND - Condition that is checked for simple use.
5140
5141 Output:
5142 *COMP_VECTYPE - the vector type for the comparison.
5143
5144 Returns whether a COND can be vectorized. Checks whether
5145 condition operands are supportable using vec_is_simple_use. */
5146
5147 static bool
5148 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5149 bb_vec_info bb_vinfo, tree *comp_vectype)
5150 {
5151 tree lhs, rhs;
5152 tree def;
5153 enum vect_def_type dt;
5154 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5155
5156 if (!COMPARISON_CLASS_P (cond))
5157 return false;
5158
5159 lhs = TREE_OPERAND (cond, 0);
5160 rhs = TREE_OPERAND (cond, 1);
5161
5162 if (TREE_CODE (lhs) == SSA_NAME)
5163 {
5164 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5165 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5166 &lhs_def_stmt, &def, &dt, &vectype1))
5167 return false;
5168 }
5169 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5170 && TREE_CODE (lhs) != FIXED_CST)
5171 return false;
5172
5173 if (TREE_CODE (rhs) == SSA_NAME)
5174 {
5175 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5176 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5177 &rhs_def_stmt, &def, &dt, &vectype2))
5178 return false;
5179 }
5180 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5181 && TREE_CODE (rhs) != FIXED_CST)
5182 return false;
5183
5184 *comp_vectype = vectype1 ? vectype1 : vectype2;
5185 return true;
5186 }
5187
5188 /* vectorizable_condition.
5189
5190 Check if STMT is conditional modify expression that can be vectorized.
5191 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5192 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5193 at GSI.
5194
5195 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5196 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5197 else caluse if it is 2).
5198
5199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5200
5201 bool
5202 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5203 gimple *vec_stmt, tree reduc_def, int reduc_index,
5204 slp_tree slp_node)
5205 {
5206 tree scalar_dest = NULL_TREE;
5207 tree vec_dest = NULL_TREE;
5208 tree cond_expr, then_clause, else_clause;
5209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5210 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5211 tree comp_vectype = NULL_TREE;
5212 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5213 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5214 tree vec_compare, vec_cond_expr;
5215 tree new_temp;
5216 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5217 tree def;
5218 enum vect_def_type dt, dts[4];
5219 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5220 int ncopies;
5221 enum tree_code code;
5222 stmt_vec_info prev_stmt_info = NULL;
5223 int i, j;
5224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5225 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5226 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5227
5228 if (slp_node || PURE_SLP_STMT (stmt_info))
5229 ncopies = 1;
5230 else
5231 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5232
5233 gcc_assert (ncopies >= 1);
5234 if (reduc_index && ncopies > 1)
5235 return false; /* FORNOW */
5236
5237 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5238 return false;
5239
5240 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5241 return false;
5242
5243 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5244 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5245 && reduc_def))
5246 return false;
5247
5248 /* FORNOW: not yet supported. */
5249 if (STMT_VINFO_LIVE_P (stmt_info))
5250 {
5251 if (vect_print_dump_info (REPORT_DETAILS))
5252 fprintf (vect_dump, "value used after loop.");
5253 return false;
5254 }
5255
5256 /* Is vectorizable conditional operation? */
5257 if (!is_gimple_assign (stmt))
5258 return false;
5259
5260 code = gimple_assign_rhs_code (stmt);
5261
5262 if (code != COND_EXPR)
5263 return false;
5264
5265 cond_expr = gimple_assign_rhs1 (stmt);
5266 then_clause = gimple_assign_rhs2 (stmt);
5267 else_clause = gimple_assign_rhs3 (stmt);
5268
5269 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5270 &comp_vectype)
5271 || !comp_vectype)
5272 return false;
5273
5274 if (TREE_CODE (then_clause) == SSA_NAME)
5275 {
5276 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5277 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5278 &then_def_stmt, &def, &dt))
5279 return false;
5280 }
5281 else if (TREE_CODE (then_clause) != INTEGER_CST
5282 && TREE_CODE (then_clause) != REAL_CST
5283 && TREE_CODE (then_clause) != FIXED_CST)
5284 return false;
5285
5286 if (TREE_CODE (else_clause) == SSA_NAME)
5287 {
5288 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5289 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5290 &else_def_stmt, &def, &dt))
5291 return false;
5292 }
5293 else if (TREE_CODE (else_clause) != INTEGER_CST
5294 && TREE_CODE (else_clause) != REAL_CST
5295 && TREE_CODE (else_clause) != FIXED_CST)
5296 return false;
5297
5298 if (!vec_stmt)
5299 {
5300 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5301 return expand_vec_cond_expr_p (vectype, comp_vectype);
5302 }
5303
5304 /* Transform. */
5305
5306 if (!slp_node)
5307 {
5308 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5309 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5310 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5311 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5312 }
5313
5314 /* Handle def. */
5315 scalar_dest = gimple_assign_lhs (stmt);
5316 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5317
5318 /* Handle cond expr. */
5319 for (j = 0; j < ncopies; j++)
5320 {
5321 gimple new_stmt = NULL;
5322 if (j == 0)
5323 {
5324 if (slp_node)
5325 {
5326 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5327 VEC (slp_void_p, heap) *vec_defs;
5328
5329 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5330 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5331 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5332 VEC_safe_push (tree, heap, ops, then_clause);
5333 VEC_safe_push (tree, heap, ops, else_clause);
5334 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5335 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5336 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5337 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5338 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5339
5340 VEC_free (tree, heap, ops);
5341 VEC_free (slp_void_p, heap, vec_defs);
5342 }
5343 else
5344 {
5345 gimple gtemp;
5346 vec_cond_lhs =
5347 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5348 stmt, NULL);
5349 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5350 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5351
5352 vec_cond_rhs =
5353 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5354 stmt, NULL);
5355 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5356 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5357 if (reduc_index == 1)
5358 vec_then_clause = reduc_def;
5359 else
5360 {
5361 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5362 stmt, NULL);
5363 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5364 NULL, &gtemp, &def, &dts[2]);
5365 }
5366 if (reduc_index == 2)
5367 vec_else_clause = reduc_def;
5368 else
5369 {
5370 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5371 stmt, NULL);
5372 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5373 NULL, &gtemp, &def, &dts[3]);
5374 }
5375 }
5376 }
5377 else
5378 {
5379 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5380 VEC_pop (tree, vec_oprnds0));
5381 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5382 VEC_pop (tree, vec_oprnds1));
5383 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5384 VEC_pop (tree, vec_oprnds2));
5385 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5386 VEC_pop (tree, vec_oprnds3));
5387 }
5388
5389 if (!slp_node)
5390 {
5391 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5392 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5393 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5394 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5395 }
5396
5397 /* Arguments are ready. Create the new vector stmt. */
5398 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5399 {
5400 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5401 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5402 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5403
5404 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5405 vec_cond_lhs, vec_cond_rhs);
5406 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5407 vec_compare, vec_then_clause, vec_else_clause);
5408
5409 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5410 new_temp = make_ssa_name (vec_dest, new_stmt);
5411 gimple_assign_set_lhs (new_stmt, new_temp);
5412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5413 if (slp_node)
5414 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5415 }
5416
5417 if (slp_node)
5418 continue;
5419
5420 if (j == 0)
5421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5422 else
5423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5424
5425 prev_stmt_info = vinfo_for_stmt (new_stmt);
5426 }
5427
5428 VEC_free (tree, heap, vec_oprnds0);
5429 VEC_free (tree, heap, vec_oprnds1);
5430 VEC_free (tree, heap, vec_oprnds2);
5431 VEC_free (tree, heap, vec_oprnds3);
5432
5433 return true;
5434 }
5435
5436
5437 /* Make sure the statement is vectorizable. */
5438
5439 bool
5440 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5441 {
5442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5443 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5444 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5445 bool ok;
5446 tree scalar_type, vectype;
5447 gimple pattern_stmt;
5448 gimple_seq pattern_def_seq;
5449
5450 if (vect_print_dump_info (REPORT_DETAILS))
5451 {
5452 fprintf (vect_dump, "==> examining statement: ");
5453 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5454 }
5455
5456 if (gimple_has_volatile_ops (stmt))
5457 {
5458 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5459 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5460
5461 return false;
5462 }
5463
5464 /* Skip stmts that do not need to be vectorized. In loops this is expected
5465 to include:
5466 - the COND_EXPR which is the loop exit condition
5467 - any LABEL_EXPRs in the loop
5468 - computations that are used only for array indexing or loop control.
5469 In basic blocks we only analyze statements that are a part of some SLP
5470 instance, therefore, all the statements are relevant.
5471
5472 Pattern statement needs to be analyzed instead of the original statement
5473 if the original statement is not relevant. Otherwise, we analyze both
5474 statements. In basic blocks we are called from some SLP instance
5475 traversal, don't analyze pattern stmts instead, the pattern stmts
5476 already will be part of SLP instance. */
5477
5478 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5479 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5480 && !STMT_VINFO_LIVE_P (stmt_info))
5481 {
5482 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5483 && pattern_stmt
5484 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5485 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5486 {
5487 /* Analyze PATTERN_STMT instead of the original stmt. */
5488 stmt = pattern_stmt;
5489 stmt_info = vinfo_for_stmt (pattern_stmt);
5490 if (vect_print_dump_info (REPORT_DETAILS))
5491 {
5492 fprintf (vect_dump, "==> examining pattern statement: ");
5493 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5494 }
5495 }
5496 else
5497 {
5498 if (vect_print_dump_info (REPORT_DETAILS))
5499 fprintf (vect_dump, "irrelevant.");
5500
5501 return true;
5502 }
5503 }
5504 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5505 && node == NULL
5506 && pattern_stmt
5507 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5508 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5509 {
5510 /* Analyze PATTERN_STMT too. */
5511 if (vect_print_dump_info (REPORT_DETAILS))
5512 {
5513 fprintf (vect_dump, "==> examining pattern statement: ");
5514 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5515 }
5516
5517 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5518 return false;
5519 }
5520
5521 if (is_pattern_stmt_p (stmt_info)
5522 && node == NULL
5523 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5524 {
5525 gimple_stmt_iterator si;
5526
5527 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5528 {
5529 gimple pattern_def_stmt = gsi_stmt (si);
5530 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5531 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5532 {
5533 /* Analyze def stmt of STMT if it's a pattern stmt. */
5534 if (vect_print_dump_info (REPORT_DETAILS))
5535 {
5536 fprintf (vect_dump, "==> examining pattern def statement: ");
5537 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5538 }
5539
5540 if (!vect_analyze_stmt (pattern_def_stmt,
5541 need_to_vectorize, node))
5542 return false;
5543 }
5544 }
5545 }
5546
5547 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5548 {
5549 case vect_internal_def:
5550 break;
5551
5552 case vect_reduction_def:
5553 case vect_nested_cycle:
5554 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5555 || relevance == vect_used_in_outer_by_reduction
5556 || relevance == vect_unused_in_scope));
5557 break;
5558
5559 case vect_induction_def:
5560 case vect_constant_def:
5561 case vect_external_def:
5562 case vect_unknown_def_type:
5563 default:
5564 gcc_unreachable ();
5565 }
5566
5567 if (bb_vinfo)
5568 {
5569 gcc_assert (PURE_SLP_STMT (stmt_info));
5570
5571 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5572 if (vect_print_dump_info (REPORT_DETAILS))
5573 {
5574 fprintf (vect_dump, "get vectype for scalar type: ");
5575 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5576 }
5577
5578 vectype = get_vectype_for_scalar_type (scalar_type);
5579 if (!vectype)
5580 {
5581 if (vect_print_dump_info (REPORT_DETAILS))
5582 {
5583 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5584 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5585 }
5586 return false;
5587 }
5588
5589 if (vect_print_dump_info (REPORT_DETAILS))
5590 {
5591 fprintf (vect_dump, "vectype: ");
5592 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5593 }
5594
5595 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5596 }
5597
5598 if (STMT_VINFO_RELEVANT_P (stmt_info))
5599 {
5600 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5601 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5602 *need_to_vectorize = true;
5603 }
5604
5605 ok = true;
5606 if (!bb_vinfo
5607 && (STMT_VINFO_RELEVANT_P (stmt_info)
5608 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5609 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5610 || vectorizable_shift (stmt, NULL, NULL, NULL)
5611 || vectorizable_operation (stmt, NULL, NULL, NULL)
5612 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5613 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5614 || vectorizable_call (stmt, NULL, NULL, NULL)
5615 || vectorizable_store (stmt, NULL, NULL, NULL)
5616 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5617 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5618 else
5619 {
5620 if (bb_vinfo)
5621 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5622 || vectorizable_shift (stmt, NULL, NULL, node)
5623 || vectorizable_operation (stmt, NULL, NULL, node)
5624 || vectorizable_assignment (stmt, NULL, NULL, node)
5625 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5626 || vectorizable_call (stmt, NULL, NULL, node)
5627 || vectorizable_store (stmt, NULL, NULL, node)
5628 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5629 }
5630
5631 if (!ok)
5632 {
5633 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5634 {
5635 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5636 fprintf (vect_dump, "supported: ");
5637 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5638 }
5639
5640 return false;
5641 }
5642
5643 if (bb_vinfo)
5644 return true;
5645
5646 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5647 need extra handling, except for vectorizable reductions. */
5648 if (STMT_VINFO_LIVE_P (stmt_info)
5649 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5650 ok = vectorizable_live_operation (stmt, NULL, NULL);
5651
5652 if (!ok)
5653 {
5654 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5655 {
5656 fprintf (vect_dump, "not vectorized: live stmt not ");
5657 fprintf (vect_dump, "supported: ");
5658 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5659 }
5660
5661 return false;
5662 }
5663
5664 return true;
5665 }
5666
5667
5668 /* Function vect_transform_stmt.
5669
5670 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5671
5672 bool
5673 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5674 bool *grouped_store, slp_tree slp_node,
5675 slp_instance slp_node_instance)
5676 {
5677 bool is_store = false;
5678 gimple vec_stmt = NULL;
5679 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5680 bool done;
5681
5682 switch (STMT_VINFO_TYPE (stmt_info))
5683 {
5684 case type_demotion_vec_info_type:
5685 case type_promotion_vec_info_type:
5686 case type_conversion_vec_info_type:
5687 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5688 gcc_assert (done);
5689 break;
5690
5691 case induc_vec_info_type:
5692 gcc_assert (!slp_node);
5693 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5694 gcc_assert (done);
5695 break;
5696
5697 case shift_vec_info_type:
5698 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5699 gcc_assert (done);
5700 break;
5701
5702 case op_vec_info_type:
5703 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5704 gcc_assert (done);
5705 break;
5706
5707 case assignment_vec_info_type:
5708 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5709 gcc_assert (done);
5710 break;
5711
5712 case load_vec_info_type:
5713 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5714 slp_node_instance);
5715 gcc_assert (done);
5716 break;
5717
5718 case store_vec_info_type:
5719 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5720 gcc_assert (done);
5721 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5722 {
5723 /* In case of interleaving, the whole chain is vectorized when the
5724 last store in the chain is reached. Store stmts before the last
5725 one are skipped, and there vec_stmt_info shouldn't be freed
5726 meanwhile. */
5727 *grouped_store = true;
5728 if (STMT_VINFO_VEC_STMT (stmt_info))
5729 is_store = true;
5730 }
5731 else
5732 is_store = true;
5733 break;
5734
5735 case condition_vec_info_type:
5736 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5737 gcc_assert (done);
5738 break;
5739
5740 case call_vec_info_type:
5741 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5742 stmt = gsi_stmt (*gsi);
5743 break;
5744
5745 case reduc_vec_info_type:
5746 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5747 gcc_assert (done);
5748 break;
5749
5750 default:
5751 if (!STMT_VINFO_LIVE_P (stmt_info))
5752 {
5753 if (vect_print_dump_info (REPORT_DETAILS))
5754 fprintf (vect_dump, "stmt not supported.");
5755 gcc_unreachable ();
5756 }
5757 }
5758
5759 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5760 is being vectorized, but outside the immediately enclosing loop. */
5761 if (vec_stmt
5762 && STMT_VINFO_LOOP_VINFO (stmt_info)
5763 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5764 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5765 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5766 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5767 || STMT_VINFO_RELEVANT (stmt_info) ==
5768 vect_used_in_outer_by_reduction))
5769 {
5770 struct loop *innerloop = LOOP_VINFO_LOOP (
5771 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5772 imm_use_iterator imm_iter;
5773 use_operand_p use_p;
5774 tree scalar_dest;
5775 gimple exit_phi;
5776
5777 if (vect_print_dump_info (REPORT_DETAILS))
5778 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5779
5780 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5781 (to be used when vectorizing outer-loop stmts that use the DEF of
5782 STMT). */
5783 if (gimple_code (stmt) == GIMPLE_PHI)
5784 scalar_dest = PHI_RESULT (stmt);
5785 else
5786 scalar_dest = gimple_assign_lhs (stmt);
5787
5788 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5789 {
5790 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5791 {
5792 exit_phi = USE_STMT (use_p);
5793 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5794 }
5795 }
5796 }
5797
5798 /* Handle stmts whose DEF is used outside the loop-nest that is
5799 being vectorized. */
5800 if (STMT_VINFO_LIVE_P (stmt_info)
5801 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5802 {
5803 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5804 gcc_assert (done);
5805 }
5806
5807 if (vec_stmt)
5808 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5809
5810 return is_store;
5811 }
5812
5813
5814 /* Remove a group of stores (for SLP or interleaving), free their
5815 stmt_vec_info. */
5816
5817 void
5818 vect_remove_stores (gimple first_stmt)
5819 {
5820 gimple next = first_stmt;
5821 gimple tmp;
5822 gimple_stmt_iterator next_si;
5823
5824 while (next)
5825 {
5826 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5827
5828 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5829 if (is_pattern_stmt_p (stmt_info))
5830 next = STMT_VINFO_RELATED_STMT (stmt_info);
5831 /* Free the attached stmt_vec_info and remove the stmt. */
5832 next_si = gsi_for_stmt (next);
5833 unlink_stmt_vdef (next);
5834 gsi_remove (&next_si, true);
5835 release_defs (next);
5836 free_stmt_vec_info (next);
5837 next = tmp;
5838 }
5839 }
5840
5841
5842 /* Function new_stmt_vec_info.
5843
5844 Create and initialize a new stmt_vec_info struct for STMT. */
5845
5846 stmt_vec_info
5847 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5848 bb_vec_info bb_vinfo)
5849 {
5850 stmt_vec_info res;
5851 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5852
5853 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5854 STMT_VINFO_STMT (res) = stmt;
5855 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5856 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5857 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5858 STMT_VINFO_LIVE_P (res) = false;
5859 STMT_VINFO_VECTYPE (res) = NULL;
5860 STMT_VINFO_VEC_STMT (res) = NULL;
5861 STMT_VINFO_VECTORIZABLE (res) = true;
5862 STMT_VINFO_IN_PATTERN_P (res) = false;
5863 STMT_VINFO_RELATED_STMT (res) = NULL;
5864 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5865 STMT_VINFO_DATA_REF (res) = NULL;
5866
5867 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5868 STMT_VINFO_DR_OFFSET (res) = NULL;
5869 STMT_VINFO_DR_INIT (res) = NULL;
5870 STMT_VINFO_DR_STEP (res) = NULL;
5871 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5872
5873 if (gimple_code (stmt) == GIMPLE_PHI
5874 && is_loop_header_bb_p (gimple_bb (stmt)))
5875 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5876 else
5877 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5878
5879 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5880 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5881 STMT_SLP_TYPE (res) = loop_vect;
5882 GROUP_FIRST_ELEMENT (res) = NULL;
5883 GROUP_NEXT_ELEMENT (res) = NULL;
5884 GROUP_SIZE (res) = 0;
5885 GROUP_STORE_COUNT (res) = 0;
5886 GROUP_GAP (res) = 0;
5887 GROUP_SAME_DR_STMT (res) = NULL;
5888 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5889
5890 return res;
5891 }
5892
5893
5894 /* Create a hash table for stmt_vec_info. */
5895
5896 void
5897 init_stmt_vec_info_vec (void)
5898 {
5899 gcc_assert (!stmt_vec_info_vec);
5900 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5901 }
5902
5903
5904 /* Free hash table for stmt_vec_info. */
5905
5906 void
5907 free_stmt_vec_info_vec (void)
5908 {
5909 gcc_assert (stmt_vec_info_vec);
5910 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5911 }
5912
5913
5914 /* Free stmt vectorization related info. */
5915
5916 void
5917 free_stmt_vec_info (gimple stmt)
5918 {
5919 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5920
5921 if (!stmt_info)
5922 return;
5923
5924 /* Check if this statement has a related "pattern stmt"
5925 (introduced by the vectorizer during the pattern recognition
5926 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5927 too. */
5928 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5929 {
5930 stmt_vec_info patt_info
5931 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5932 if (patt_info)
5933 {
5934 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5935 if (seq)
5936 {
5937 gimple_stmt_iterator si;
5938 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5939 free_stmt_vec_info (gsi_stmt (si));
5940 }
5941 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5942 }
5943 }
5944
5945 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5946 set_vinfo_for_stmt (stmt, NULL);
5947 free (stmt_info);
5948 }
5949
5950
5951 /* Function get_vectype_for_scalar_type_and_size.
5952
5953 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5954 by the target. */
5955
5956 static tree
5957 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5958 {
5959 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5960 enum machine_mode simd_mode;
5961 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5962 int nunits;
5963 tree vectype;
5964
5965 if (nbytes == 0)
5966 return NULL_TREE;
5967
5968 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5969 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5970 return NULL_TREE;
5971
5972 /* We can't build a vector type of elements with alignment bigger than
5973 their size. */
5974 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5975 return NULL_TREE;
5976
5977 /* For vector types of elements whose mode precision doesn't
5978 match their types precision we use a element type of mode
5979 precision. The vectorization routines will have to make sure
5980 they support the proper result truncation/extension.
5981 We also make sure to build vector types with INTEGER_TYPE
5982 component type only. */
5983 if (INTEGRAL_TYPE_P (scalar_type)
5984 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5985 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5986 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5987 TYPE_UNSIGNED (scalar_type));
5988
5989 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5990 When the component mode passes the above test simply use a type
5991 corresponding to that mode. The theory is that any use that
5992 would cause problems with this will disable vectorization anyway. */
5993 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5994 && !INTEGRAL_TYPE_P (scalar_type)
5995 && !POINTER_TYPE_P (scalar_type))
5996 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5997
5998 /* If no size was supplied use the mode the target prefers. Otherwise
5999 lookup a vector mode of the specified size. */
6000 if (size == 0)
6001 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6002 else
6003 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6004 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6005 if (nunits <= 1)
6006 return NULL_TREE;
6007
6008 vectype = build_vector_type (scalar_type, nunits);
6009 if (vect_print_dump_info (REPORT_DETAILS))
6010 {
6011 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
6012 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
6013 }
6014
6015 if (!vectype)
6016 return NULL_TREE;
6017
6018 if (vect_print_dump_info (REPORT_DETAILS))
6019 {
6020 fprintf (vect_dump, "vectype: ");
6021 print_generic_expr (vect_dump, vectype, TDF_SLIM);
6022 }
6023
6024 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6025 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6026 {
6027 if (vect_print_dump_info (REPORT_DETAILS))
6028 fprintf (vect_dump, "mode not supported by target.");
6029 return NULL_TREE;
6030 }
6031
6032 return vectype;
6033 }
6034
6035 unsigned int current_vector_size;
6036
6037 /* Function get_vectype_for_scalar_type.
6038
6039 Returns the vector type corresponding to SCALAR_TYPE as supported
6040 by the target. */
6041
6042 tree
6043 get_vectype_for_scalar_type (tree scalar_type)
6044 {
6045 tree vectype;
6046 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6047 current_vector_size);
6048 if (vectype
6049 && current_vector_size == 0)
6050 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6051 return vectype;
6052 }
6053
6054 /* Function get_same_sized_vectype
6055
6056 Returns a vector type corresponding to SCALAR_TYPE of size
6057 VECTOR_TYPE if supported by the target. */
6058
6059 tree
6060 get_same_sized_vectype (tree scalar_type, tree vector_type)
6061 {
6062 return get_vectype_for_scalar_type_and_size
6063 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6064 }
6065
6066 /* Function vect_is_simple_use.
6067
6068 Input:
6069 LOOP_VINFO - the vect info of the loop that is being vectorized.
6070 BB_VINFO - the vect info of the basic block that is being vectorized.
6071 OPERAND - operand of STMT in the loop or bb.
6072 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6073
6074 Returns whether a stmt with OPERAND can be vectorized.
6075 For loops, supportable operands are constants, loop invariants, and operands
6076 that are defined by the current iteration of the loop. Unsupportable
6077 operands are those that are defined by a previous iteration of the loop (as
6078 is the case in reduction/induction computations).
6079 For basic blocks, supportable operands are constants and bb invariants.
6080 For now, operands defined outside the basic block are not supported. */
6081
6082 bool
6083 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6084 bb_vec_info bb_vinfo, gimple *def_stmt,
6085 tree *def, enum vect_def_type *dt)
6086 {
6087 basic_block bb;
6088 stmt_vec_info stmt_vinfo;
6089 struct loop *loop = NULL;
6090
6091 if (loop_vinfo)
6092 loop = LOOP_VINFO_LOOP (loop_vinfo);
6093
6094 *def_stmt = NULL;
6095 *def = NULL_TREE;
6096
6097 if (vect_print_dump_info (REPORT_DETAILS))
6098 {
6099 fprintf (vect_dump, "vect_is_simple_use: operand ");
6100 print_generic_expr (vect_dump, operand, TDF_SLIM);
6101 }
6102
6103 if (CONSTANT_CLASS_P (operand))
6104 {
6105 *dt = vect_constant_def;
6106 return true;
6107 }
6108
6109 if (is_gimple_min_invariant (operand))
6110 {
6111 *def = operand;
6112 *dt = vect_external_def;
6113 return true;
6114 }
6115
6116 if (TREE_CODE (operand) == PAREN_EXPR)
6117 {
6118 if (vect_print_dump_info (REPORT_DETAILS))
6119 fprintf (vect_dump, "non-associatable copy.");
6120 operand = TREE_OPERAND (operand, 0);
6121 }
6122
6123 if (TREE_CODE (operand) != SSA_NAME)
6124 {
6125 if (vect_print_dump_info (REPORT_DETAILS))
6126 fprintf (vect_dump, "not ssa-name.");
6127 return false;
6128 }
6129
6130 *def_stmt = SSA_NAME_DEF_STMT (operand);
6131 if (*def_stmt == NULL)
6132 {
6133 if (vect_print_dump_info (REPORT_DETAILS))
6134 fprintf (vect_dump, "no def_stmt.");
6135 return false;
6136 }
6137
6138 if (vect_print_dump_info (REPORT_DETAILS))
6139 {
6140 fprintf (vect_dump, "def_stmt: ");
6141 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
6142 }
6143
6144 /* Empty stmt is expected only in case of a function argument.
6145 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6146 if (gimple_nop_p (*def_stmt))
6147 {
6148 *def = operand;
6149 *dt = vect_external_def;
6150 return true;
6151 }
6152
6153 bb = gimple_bb (*def_stmt);
6154
6155 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6156 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6157 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6158 *dt = vect_external_def;
6159 else
6160 {
6161 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6162 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6163 }
6164
6165 if (*dt == vect_unknown_def_type
6166 || (stmt
6167 && *dt == vect_double_reduction_def
6168 && gimple_code (stmt) != GIMPLE_PHI))
6169 {
6170 if (vect_print_dump_info (REPORT_DETAILS))
6171 fprintf (vect_dump, "Unsupported pattern.");
6172 return false;
6173 }
6174
6175 if (vect_print_dump_info (REPORT_DETAILS))
6176 fprintf (vect_dump, "type of def: %d.",*dt);
6177
6178 switch (gimple_code (*def_stmt))
6179 {
6180 case GIMPLE_PHI:
6181 *def = gimple_phi_result (*def_stmt);
6182 break;
6183
6184 case GIMPLE_ASSIGN:
6185 *def = gimple_assign_lhs (*def_stmt);
6186 break;
6187
6188 case GIMPLE_CALL:
6189 *def = gimple_call_lhs (*def_stmt);
6190 if (*def != NULL)
6191 break;
6192 /* FALLTHRU */
6193 default:
6194 if (vect_print_dump_info (REPORT_DETAILS))
6195 fprintf (vect_dump, "unsupported defining stmt: ");
6196 return false;
6197 }
6198
6199 return true;
6200 }
6201
6202 /* Function vect_is_simple_use_1.
6203
6204 Same as vect_is_simple_use_1 but also determines the vector operand
6205 type of OPERAND and stores it to *VECTYPE. If the definition of
6206 OPERAND is vect_uninitialized_def, vect_constant_def or
6207 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6208 is responsible to compute the best suited vector type for the
6209 scalar operand. */
6210
6211 bool
6212 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6213 bb_vec_info bb_vinfo, gimple *def_stmt,
6214 tree *def, enum vect_def_type *dt, tree *vectype)
6215 {
6216 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6217 def, dt))
6218 return false;
6219
6220 /* Now get a vector type if the def is internal, otherwise supply
6221 NULL_TREE and leave it up to the caller to figure out a proper
6222 type for the use stmt. */
6223 if (*dt == vect_internal_def
6224 || *dt == vect_induction_def
6225 || *dt == vect_reduction_def
6226 || *dt == vect_double_reduction_def
6227 || *dt == vect_nested_cycle)
6228 {
6229 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6230
6231 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6232 && !STMT_VINFO_RELEVANT (stmt_info)
6233 && !STMT_VINFO_LIVE_P (stmt_info))
6234 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6235
6236 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6237 gcc_assert (*vectype != NULL_TREE);
6238 }
6239 else if (*dt == vect_uninitialized_def
6240 || *dt == vect_constant_def
6241 || *dt == vect_external_def)
6242 *vectype = NULL_TREE;
6243 else
6244 gcc_unreachable ();
6245
6246 return true;
6247 }
6248
6249
6250 /* Function supportable_widening_operation
6251
6252 Check whether an operation represented by the code CODE is a
6253 widening operation that is supported by the target platform in
6254 vector form (i.e., when operating on arguments of type VECTYPE_IN
6255 producing a result of type VECTYPE_OUT).
6256
6257 Widening operations we currently support are NOP (CONVERT), FLOAT
6258 and WIDEN_MULT. This function checks if these operations are supported
6259 by the target platform either directly (via vector tree-codes), or via
6260 target builtins.
6261
6262 Output:
6263 - CODE1 and CODE2 are codes of vector operations to be used when
6264 vectorizing the operation, if available.
6265 - DECL1 and DECL2 are decls of target builtin functions to be used
6266 when vectorizing the operation, if available. In this case,
6267 CODE1 and CODE2 are CALL_EXPR.
6268 - MULTI_STEP_CVT determines the number of required intermediate steps in
6269 case of multi-step conversion (like char->short->int - in that case
6270 MULTI_STEP_CVT will be 1).
6271 - INTERM_TYPES contains the intermediate type required to perform the
6272 widening operation (short in the above example). */
6273
6274 bool
6275 supportable_widening_operation (enum tree_code code, gimple stmt,
6276 tree vectype_out, tree vectype_in,
6277 tree *decl1 ATTRIBUTE_UNUSED,
6278 tree *decl2 ATTRIBUTE_UNUSED,
6279 enum tree_code *code1, enum tree_code *code2,
6280 int *multi_step_cvt,
6281 VEC (tree, heap) **interm_types)
6282 {
6283 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6284 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6285 struct loop *vect_loop = NULL;
6286 enum machine_mode vec_mode;
6287 enum insn_code icode1, icode2;
6288 optab optab1, optab2;
6289 tree vectype = vectype_in;
6290 tree wide_vectype = vectype_out;
6291 enum tree_code c1, c2;
6292 int i;
6293 tree prev_type, intermediate_type;
6294 enum machine_mode intermediate_mode, prev_mode;
6295 optab optab3, optab4;
6296
6297 *multi_step_cvt = 0;
6298 if (loop_info)
6299 vect_loop = LOOP_VINFO_LOOP (loop_info);
6300
6301 switch (code)
6302 {
6303 case WIDEN_MULT_EXPR:
6304 /* The result of a vectorized widening operation usually requires
6305 two vectors (because the widened results do not fit into one vector).
6306 The generated vector results would normally be expected to be
6307 generated in the same order as in the original scalar computation,
6308 i.e. if 8 results are generated in each vector iteration, they are
6309 to be organized as follows:
6310 vect1: [res1,res2,res3,res4],
6311 vect2: [res5,res6,res7,res8].
6312
6313 However, in the special case that the result of the widening
6314 operation is used in a reduction computation only, the order doesn't
6315 matter (because when vectorizing a reduction we change the order of
6316 the computation). Some targets can take advantage of this and
6317 generate more efficient code. For example, targets like Altivec,
6318 that support widen_mult using a sequence of {mult_even,mult_odd}
6319 generate the following vectors:
6320 vect1: [res1,res3,res5,res7],
6321 vect2: [res2,res4,res6,res8].
6322
6323 When vectorizing outer-loops, we execute the inner-loop sequentially
6324 (each vectorized inner-loop iteration contributes to VF outer-loop
6325 iterations in parallel). We therefore don't allow to change the
6326 order of the computation in the inner-loop during outer-loop
6327 vectorization. */
6328 /* TODO: Another case in which order doesn't *really* matter is when we
6329 widen and then contract again, e.g. (short)((int)x * y >> 8).
6330 Normally, pack_trunc performs an even/odd permute, whereas the
6331 repack from an even/odd expansion would be an interleave, which
6332 would be significantly simpler for e.g. AVX2. */
6333 /* In any case, in order to avoid duplicating the code below, recurse
6334 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6335 are properly set up for the caller. If we fail, we'll continue with
6336 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6337 if (vect_loop
6338 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6339 && !nested_in_vect_loop_p (vect_loop, stmt)
6340 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6341 stmt, vectype_out, vectype_in,
6342 NULL, NULL, code1, code2,
6343 multi_step_cvt, interm_types))
6344 return true;
6345 c1 = VEC_WIDEN_MULT_LO_EXPR;
6346 c2 = VEC_WIDEN_MULT_HI_EXPR;
6347 break;
6348
6349 case VEC_WIDEN_MULT_EVEN_EXPR:
6350 /* Support the recursion induced just above. */
6351 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6352 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6353 break;
6354
6355 case WIDEN_LSHIFT_EXPR:
6356 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6357 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6358 break;
6359
6360 CASE_CONVERT:
6361 c1 = VEC_UNPACK_LO_EXPR;
6362 c2 = VEC_UNPACK_HI_EXPR;
6363 break;
6364
6365 case FLOAT_EXPR:
6366 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6367 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6368 break;
6369
6370 case FIX_TRUNC_EXPR:
6371 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6372 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6373 computing the operation. */
6374 return false;
6375
6376 default:
6377 gcc_unreachable ();
6378 }
6379
6380 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6381 {
6382 enum tree_code ctmp = c1;
6383 c1 = c2;
6384 c2 = ctmp;
6385 }
6386
6387 if (code == FIX_TRUNC_EXPR)
6388 {
6389 /* The signedness is determined from output operand. */
6390 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6391 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6392 }
6393 else
6394 {
6395 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6396 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6397 }
6398
6399 if (!optab1 || !optab2)
6400 return false;
6401
6402 vec_mode = TYPE_MODE (vectype);
6403 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6404 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6405 return false;
6406
6407 *code1 = c1;
6408 *code2 = c2;
6409
6410 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6411 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6412 return true;
6413
6414 /* Check if it's a multi-step conversion that can be done using intermediate
6415 types. */
6416
6417 prev_type = vectype;
6418 prev_mode = vec_mode;
6419
6420 if (!CONVERT_EXPR_CODE_P (code))
6421 return false;
6422
6423 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6424 intermediate steps in promotion sequence. We try
6425 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6426 not. */
6427 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6428 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6429 {
6430 intermediate_mode = insn_data[icode1].operand[0].mode;
6431 intermediate_type
6432 = lang_hooks.types.type_for_mode (intermediate_mode,
6433 TYPE_UNSIGNED (prev_type));
6434 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6435 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6436
6437 if (!optab3 || !optab4
6438 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6439 || insn_data[icode1].operand[0].mode != intermediate_mode
6440 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6441 || insn_data[icode2].operand[0].mode != intermediate_mode
6442 || ((icode1 = optab_handler (optab3, intermediate_mode))
6443 == CODE_FOR_nothing)
6444 || ((icode2 = optab_handler (optab4, intermediate_mode))
6445 == CODE_FOR_nothing))
6446 break;
6447
6448 VEC_quick_push (tree, *interm_types, intermediate_type);
6449 (*multi_step_cvt)++;
6450
6451 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6452 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6453 return true;
6454
6455 prev_type = intermediate_type;
6456 prev_mode = intermediate_mode;
6457 }
6458
6459 VEC_free (tree, heap, *interm_types);
6460 return false;
6461 }
6462
6463
6464 /* Function supportable_narrowing_operation
6465
6466 Check whether an operation represented by the code CODE is a
6467 narrowing operation that is supported by the target platform in
6468 vector form (i.e., when operating on arguments of type VECTYPE_IN
6469 and producing a result of type VECTYPE_OUT).
6470
6471 Narrowing operations we currently support are NOP (CONVERT) and
6472 FIX_TRUNC. This function checks if these operations are supported by
6473 the target platform directly via vector tree-codes.
6474
6475 Output:
6476 - CODE1 is the code of a vector operation to be used when
6477 vectorizing the operation, if available.
6478 - MULTI_STEP_CVT determines the number of required intermediate steps in
6479 case of multi-step conversion (like int->short->char - in that case
6480 MULTI_STEP_CVT will be 1).
6481 - INTERM_TYPES contains the intermediate type required to perform the
6482 narrowing operation (short in the above example). */
6483
6484 bool
6485 supportable_narrowing_operation (enum tree_code code,
6486 tree vectype_out, tree vectype_in,
6487 enum tree_code *code1, int *multi_step_cvt,
6488 VEC (tree, heap) **interm_types)
6489 {
6490 enum machine_mode vec_mode;
6491 enum insn_code icode1;
6492 optab optab1, interm_optab;
6493 tree vectype = vectype_in;
6494 tree narrow_vectype = vectype_out;
6495 enum tree_code c1;
6496 tree intermediate_type;
6497 enum machine_mode intermediate_mode, prev_mode;
6498 int i;
6499 bool uns;
6500
6501 *multi_step_cvt = 0;
6502 switch (code)
6503 {
6504 CASE_CONVERT:
6505 c1 = VEC_PACK_TRUNC_EXPR;
6506 break;
6507
6508 case FIX_TRUNC_EXPR:
6509 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6510 break;
6511
6512 case FLOAT_EXPR:
6513 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6514 tree code and optabs used for computing the operation. */
6515 return false;
6516
6517 default:
6518 gcc_unreachable ();
6519 }
6520
6521 if (code == FIX_TRUNC_EXPR)
6522 /* The signedness is determined from output operand. */
6523 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6524 else
6525 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6526
6527 if (!optab1)
6528 return false;
6529
6530 vec_mode = TYPE_MODE (vectype);
6531 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6532 return false;
6533
6534 *code1 = c1;
6535
6536 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6537 return true;
6538
6539 /* Check if it's a multi-step conversion that can be done using intermediate
6540 types. */
6541 prev_mode = vec_mode;
6542 if (code == FIX_TRUNC_EXPR)
6543 uns = TYPE_UNSIGNED (vectype_out);
6544 else
6545 uns = TYPE_UNSIGNED (vectype);
6546
6547 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6548 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6549 costly than signed. */
6550 if (code == FIX_TRUNC_EXPR && uns)
6551 {
6552 enum insn_code icode2;
6553
6554 intermediate_type
6555 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6556 interm_optab
6557 = optab_for_tree_code (c1, intermediate_type, optab_default);
6558 if (interm_optab != NULL
6559 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6560 && insn_data[icode1].operand[0].mode
6561 == insn_data[icode2].operand[0].mode)
6562 {
6563 uns = false;
6564 optab1 = interm_optab;
6565 icode1 = icode2;
6566 }
6567 }
6568
6569 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6570 intermediate steps in promotion sequence. We try
6571 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6572 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6573 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6574 {
6575 intermediate_mode = insn_data[icode1].operand[0].mode;
6576 intermediate_type
6577 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6578 interm_optab
6579 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6580 optab_default);
6581 if (!interm_optab
6582 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6583 || insn_data[icode1].operand[0].mode != intermediate_mode
6584 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6585 == CODE_FOR_nothing))
6586 break;
6587
6588 VEC_quick_push (tree, *interm_types, intermediate_type);
6589 (*multi_step_cvt)++;
6590
6591 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6592 return true;
6593
6594 prev_mode = intermediate_mode;
6595 optab1 = interm_optab;
6596 }
6597
6598 VEC_free (tree, heap, *interm_types);
6599 return false;
6600 }