]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-stmts.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
55
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
58
59 /* Return the vectorized type for the given statement. */
60
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
63 {
64 return STMT_VINFO_VECTYPE (stmt_info);
65 }
66
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71 {
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
76
77 if (!loop_vinfo)
78 return false;
79
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
81
82 return (bb->loop_father == loop->inner);
83 }
84
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
88
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
93 {
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
100
101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
102 body_cost_vec->safe_push (si);
103
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
107 }
108
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
110
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
113 {
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
116 }
117
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
122
123 static tree
124 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
125 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
126 {
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
129
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
136
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
141
142 return vect_name;
143 }
144
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
148
149 static void
150 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
151 tree vect, tree array, unsigned HOST_WIDE_INT n)
152 {
153 tree array_ref;
154 gimple *new_stmt;
155
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
159
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
162 }
163
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
167
168 static tree
169 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
170 {
171 tree mem_ref;
172
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 return mem_ref;
177 }
178
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
181
182 static void
183 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
184 tree var)
185 {
186 tree clobber = build_clobber (TREE_TYPE (var));
187 gimple *new_stmt = gimple_build_assign (var, clobber);
188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
189 }
190
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192
193 /* Function vect_mark_relevant.
194
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
196
197 static void
198 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
199 enum vect_relevant relevant, bool live_p)
200 {
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
203
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE, vect_location,
206 "mark relevant %d, live %d: %G", relevant, live_p,
207 stmt_info->stmt);
208
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
214 {
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
219
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_vec_info old_stmt_info = stmt_info;
225 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
227 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
229 }
230
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
234
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237 {
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
242 }
243
244 worklist->safe_push (stmt_info);
245 }
246
247
248 /* Function is_simple_and_all_uses_invariant
249
250 Return true if STMT_INFO is simple and all uses of it are invariant. */
251
252 bool
253 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
254 loop_vec_info loop_vinfo)
255 {
256 tree op;
257 ssa_op_iter iter;
258
259 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
260 if (!stmt)
261 return false;
262
263 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
264 {
265 enum vect_def_type dt = vect_uninitialized_def;
266
267 if (!vect_is_simple_use (op, loop_vinfo, &dt))
268 {
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
271 "use not simple.\n");
272 return false;
273 }
274
275 if (dt != vect_external_def && dt != vect_constant_def)
276 return false;
277 }
278 return true;
279 }
280
281 /* Function vect_stmt_relevant_p.
282
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
285
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
290
291 CHECKME: what other side effects would the vectorizer allow? */
292
293 static bool
294 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
295 enum vect_relevant *relevant, bool *live_p)
296 {
297 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
298 ssa_op_iter op_iter;
299 imm_use_iterator imm_iter;
300 use_operand_p use_p;
301 def_operand_p def_p;
302
303 *relevant = vect_unused_in_scope;
304 *live_p = false;
305
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt_info->stmt)
308 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
310
311 /* changing memory. */
312 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt_info->stmt)
314 && !gimple_clobber_p (stmt_info->stmt))
315 {
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
320 }
321
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
324 {
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326 {
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
329 {
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
333
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
336
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
341
342 *live_p = true;
343 }
344 }
345 }
346
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
349 {
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
354 }
355
356 return (*live_p || *relevant);
357 }
358
359
360 /* Function exist_non_indexing_operands_for_use_p
361
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
364
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
367 {
368 tree operand;
369
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info))
374 return true;
375
376 /* STMT has a data_ref. FORNOW this means that its of one of
377 the following forms:
378 -1- ARRAY_REF = var
379 -2- var = ARRAY_REF
380 (This should have been verified in analyze_data_refs).
381
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
384 for array indexing.
385
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
388
389 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
390 if (!assign || !gimple_assign_copy_p (assign))
391 {
392 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
393 if (call && gimple_call_internal_p (call))
394 {
395 internal_fn ifn = gimple_call_internal_fn (call);
396 int mask_index = internal_fn_mask_index (ifn);
397 if (mask_index >= 0
398 && use == gimple_call_arg (call, mask_index))
399 return true;
400 int stored_value_index = internal_fn_stored_value_index (ifn);
401 if (stored_value_index >= 0
402 && use == gimple_call_arg (call, stored_value_index))
403 return true;
404 if (internal_gather_scatter_fn_p (ifn)
405 && use == gimple_call_arg (call, 1))
406 return true;
407 }
408 return false;
409 }
410
411 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
412 return false;
413 operand = gimple_assign_rhs1 (assign);
414 if (TREE_CODE (operand) != SSA_NAME)
415 return false;
416
417 if (operand == use)
418 return true;
419
420 return false;
421 }
422
423
424 /*
425 Function process_use.
426
427 Inputs:
428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433 be performed.
434
435 Outputs:
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
440 Exceptions:
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
448
449 Return true if everything is as expected. Return false otherwise. */
450
451 static opt_result
452 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
453 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
454 bool force)
455 {
456 stmt_vec_info dstmt_vinfo;
457 basic_block bb, def_bb;
458 enum vect_def_type dt;
459
460 /* case 1: we are only interested in uses that need to be vectorized. Uses
461 that are used for address computation are not considered relevant. */
462 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
463 return opt_result::success ();
464
465 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
466 return opt_result::failure_at (stmt_vinfo->stmt,
467 "not vectorized:"
468 " unsupported use in stmt.\n");
469
470 if (!dstmt_vinfo)
471 return opt_result::success ();
472
473 def_bb = gimple_bb (dstmt_vinfo->stmt);
474
475 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
476 DSTMT_VINFO must have already been processed, because this should be the
477 only way that STMT, which is a reduction-phi, was put in the worklist,
478 as there should be no other uses for DSTMT_VINFO in the loop. So we just
479 check that everything is as expected, and we are done. */
480 bb = gimple_bb (stmt_vinfo->stmt);
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
486 {
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
491 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
492 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
493 return opt_result::success ();
494 }
495
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
504 {
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
508
509 switch (relevant)
510 {
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
515
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
520
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
525
526 case vect_used_in_scope:
527 break;
528
529 default:
530 gcc_unreachable ();
531 }
532 }
533
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
536 ...
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
542 {
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
546
547 switch (relevant)
548 {
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
554
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
559
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
563
564 default:
565 gcc_unreachable ();
566 }
567 }
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
578 {
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
583 }
584
585
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
588 }
589
590
591 /* Function vect_mark_stmts_to_be_vectorized.
592
593 Not all stmts in the loop need to be vectorized. For example:
594
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
599
600 3. j = j + 1
601
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
604
605 This pass detects such stmts. */
606
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
609 {
610 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
618
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
620
621 auto_vec<stmt_vec_info, 64> worklist;
622
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
625 {
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
628 {
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
633
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
636 }
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
638 {
639 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "init: stmt relevant? %G", stmt_info->stmt);
643
644 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
645 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
646 }
647 }
648
649 /* 2. Process_worklist */
650 while (worklist.length () > 0)
651 {
652 use_operand_p use_p;
653 ssa_op_iter iter;
654
655 stmt_vec_info stmt_vinfo = worklist.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location,
658 "worklist: examine stmt: %G", stmt_vinfo->stmt);
659
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
662 of STMT. */
663 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
664
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
667
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
675
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
677 {
678 case vect_reduction_def:
679 gcc_assert (relevant != vect_unused_in_scope);
680 if (relevant != vect_unused_in_scope
681 && relevant != vect_used_in_scope
682 && relevant != vect_used_by_reduction
683 && relevant != vect_used_only_live)
684 return opt_result::failure_at
685 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
686 break;
687
688 case vect_nested_cycle:
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_outer_by_reduction
691 && relevant != vect_used_in_outer)
692 return opt_result::failure_at
693 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
694 break;
695
696 case vect_double_reduction_def:
697 if (relevant != vect_unused_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
700 return opt_result::failure_at
701 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
702 break;
703
704 default:
705 break;
706 }
707
708 if (is_pattern_stmt_p (stmt_vinfo))
709 {
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
714 {
715 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
716 tree op = gimple_assign_rhs1 (assign);
717
718 i = 1;
719 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
720 {
721 opt_result res
722 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
723 loop_vinfo, relevant, &worklist, false);
724 if (!res)
725 return res;
726 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
727 loop_vinfo, relevant, &worklist, false);
728 if (!res)
729 return res;
730 i = 2;
731 }
732 for (; i < gimple_num_ops (assign); i++)
733 {
734 op = gimple_op (assign, i);
735 if (TREE_CODE (op) == SSA_NAME)
736 {
737 opt_result res
738 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
739 &worklist, false);
740 if (!res)
741 return res;
742 }
743 }
744 }
745 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
746 {
747 for (i = 0; i < gimple_call_num_args (call); i++)
748 {
749 tree arg = gimple_call_arg (call, i);
750 opt_result res
751 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
752 &worklist, false);
753 if (!res)
754 return res;
755 }
756 }
757 }
758 else
759 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
760 {
761 tree op = USE_FROM_PTR (use_p);
762 opt_result res
763 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
764 &worklist, false);
765 if (!res)
766 return res;
767 }
768
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
770 {
771 gather_scatter_info gs_info;
772 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
773 gcc_unreachable ();
774 opt_result res
775 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
776 &worklist, true);
777 if (!res)
778 return res;
779 }
780 } /* while worklist */
781
782 return opt_result::success ();
783 }
784
785 /* Compute the prologue cost for invariant or constant operands. */
786
787 static unsigned
788 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
789 unsigned opno, enum vect_def_type dt,
790 stmt_vector_for_cost *cost_vec)
791 {
792 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
793 tree op = gimple_op (stmt, opno);
794 unsigned prologue_cost = 0;
795
796 /* Without looking at the actual initializer a vector of
797 constants can be implemented as load from the constant pool.
798 When all elements are the same we can use a splat. */
799 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
800 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
801 unsigned num_vects_to_check;
802 unsigned HOST_WIDE_INT const_nunits;
803 unsigned nelt_limit;
804 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
805 && ! multiple_p (const_nunits, group_size))
806 {
807 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
808 nelt_limit = const_nunits;
809 }
810 else
811 {
812 /* If either the vector has variable length or the vectors
813 are composed of repeated whole groups we only need to
814 cost construction once. All vectors will be the same. */
815 num_vects_to_check = 1;
816 nelt_limit = group_size;
817 }
818 tree elt = NULL_TREE;
819 unsigned nelt = 0;
820 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
821 {
822 unsigned si = j % group_size;
823 if (nelt == 0)
824 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
825 /* ??? We're just tracking whether all operands of a single
826 vector initializer are the same, ideally we'd check if
827 we emitted the same one already. */
828 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
829 opno))
830 elt = NULL_TREE;
831 nelt++;
832 if (nelt == nelt_limit)
833 {
834 /* ??? We need to pass down stmt_info for a vector type
835 even if it points to the wrong stmt. */
836 prologue_cost += record_stmt_cost
837 (cost_vec, 1,
838 dt == vect_external_def
839 ? (elt ? scalar_to_vec : vec_construct)
840 : vector_load,
841 stmt_info, 0, vect_prologue);
842 nelt = 0;
843 }
844 }
845
846 return prologue_cost;
847 }
848
849 /* Function vect_model_simple_cost.
850
851 Models cost for simple operations, i.e. those that only emit ncopies of a
852 single op. Right now, this does not account for multiple insns that could
853 be generated for the single vector op. We will handle that shortly. */
854
855 static void
856 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
857 enum vect_def_type *dt,
858 int ndts,
859 slp_tree node,
860 stmt_vector_for_cost *cost_vec)
861 {
862 int inside_cost = 0, prologue_cost = 0;
863
864 gcc_assert (cost_vec != NULL);
865
866 /* ??? Somehow we need to fix this at the callers. */
867 if (node)
868 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
869
870 if (node)
871 {
872 /* Scan operands and account for prologue cost of constants/externals.
873 ??? This over-estimates cost for multiple uses and should be
874 re-engineered. */
875 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
876 tree lhs = gimple_get_lhs (stmt);
877 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
878 {
879 tree op = gimple_op (stmt, i);
880 enum vect_def_type dt;
881 if (!op || op == lhs)
882 continue;
883 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
884 && (dt == vect_constant_def || dt == vect_external_def))
885 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
886 i, dt, cost_vec);
887 }
888 }
889 else
890 /* Cost the "broadcast" of a scalar operand in to a vector operand.
891 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
892 cost model. */
893 for (int i = 0; i < ndts; i++)
894 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
895 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
896 stmt_info, 0, vect_prologue);
897
898 /* Adjust for two-operator SLP nodes. */
899 if (node && SLP_TREE_TWO_OPERATORS (node))
900 {
901 ncopies *= 2;
902 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
903 stmt_info, 0, vect_body);
904 }
905
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
907 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
908 stmt_info, 0, vect_body);
909
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_simple_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
914 }
915
916
917 /* Model cost for type demotion and promotion operations. PWR is normally
918 zero for single-step promotions and demotions. It will be one if
919 two-step promotion/demotion is required, and so on. Each additional
920 step doubles the number of instructions required. */
921
922 static void
923 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
924 enum vect_def_type *dt, int pwr,
925 stmt_vector_for_cost *cost_vec)
926 {
927 int i, tmp;
928 int inside_cost = 0, prologue_cost = 0;
929
930 for (i = 0; i < pwr + 1; i++)
931 {
932 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
933 (i + 1) : i;
934 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
935 vec_promote_demote, stmt_info, 0,
936 vect_body);
937 }
938
939 /* FORNOW: Assuming maximum 2 args per stmts. */
940 for (i = 0; i < 2; i++)
941 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
942 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
943 stmt_info, 0, vect_prologue);
944
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_promotion_demotion_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost, prologue_cost);
949 }
950
951 /* Function vect_model_store_cost
952
953 Models cost for stores. In the case of grouped accesses, one access
954 has the overhead of the grouped access attributed to it. */
955
956 static void
957 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
958 enum vect_def_type dt,
959 vect_memory_access_type memory_access_type,
960 vec_load_store_type vls_type, slp_tree slp_node,
961 stmt_vector_for_cost *cost_vec)
962 {
963 unsigned int inside_cost = 0, prologue_cost = 0;
964 stmt_vec_info first_stmt_info = stmt_info;
965 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
966
967 /* ??? Somehow we need to fix this at the callers. */
968 if (slp_node)
969 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
970
971 if (vls_type == VLS_STORE_INVARIANT)
972 {
973 if (slp_node)
974 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
975 1, dt, cost_vec);
976 else
977 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
978 stmt_info, 0, vect_prologue);
979 }
980
981 /* Grouped stores update all elements in the group at once,
982 so we want the DR for the first statement. */
983 if (!slp_node && grouped_access_p)
984 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
985
986 /* True if we should include any once-per-group costs as well as
987 the cost of the statement itself. For SLP we only get called
988 once per group anyhow. */
989 bool first_stmt_p = (first_stmt_info == stmt_info);
990
991 /* We assume that the cost of a single store-lanes instruction is
992 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
993 access is instead being provided by a permute-and-store operation,
994 include the cost of the permutes. */
995 if (first_stmt_p
996 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
997 {
998 /* Uses a high and low interleave or shuffle operations for each
999 needed permute. */
1000 int group_size = DR_GROUP_SIZE (first_stmt_info);
1001 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1002 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1003 stmt_info, 0, vect_body);
1004
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE, vect_location,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1008 group_size);
1009 }
1010
1011 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1012 /* Costs of the stores. */
1013 if (memory_access_type == VMAT_ELEMENTWISE
1014 || memory_access_type == VMAT_GATHER_SCATTER)
1015 {
1016 /* N scalar stores plus extracting the elements. */
1017 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1018 inside_cost += record_stmt_cost (cost_vec,
1019 ncopies * assumed_nunits,
1020 scalar_store, stmt_info, 0, vect_body);
1021 }
1022 else
1023 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1024
1025 if (memory_access_type == VMAT_ELEMENTWISE
1026 || memory_access_type == VMAT_STRIDED_SLP)
1027 {
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1030 inside_cost += record_stmt_cost (cost_vec,
1031 ncopies * assumed_nunits,
1032 vec_to_scalar, stmt_info, 0, vect_body);
1033 }
1034
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_store_cost: inside_cost = %d, "
1038 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1039 }
1040
1041
1042 /* Calculate cost of DR's memory access. */
1043 void
1044 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1045 unsigned int *inside_cost,
1046 stmt_vector_for_cost *body_cost_vec)
1047 {
1048 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1049 int alignment_support_scheme
1050 = vect_supportable_dr_alignment (dr_info, false);
1051
1052 switch (alignment_support_scheme)
1053 {
1054 case dr_aligned:
1055 {
1056 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1057 vector_store, stmt_info, 0,
1058 vect_body);
1059
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_model_store_cost: aligned.\n");
1063 break;
1064 }
1065
1066 case dr_unaligned_supported:
1067 {
1068 /* Here, we assign an additional cost for the unaligned store. */
1069 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1070 unaligned_store, stmt_info,
1071 DR_MISALIGNMENT (dr_info),
1072 vect_body);
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "vect_model_store_cost: unaligned supported by "
1076 "hardware.\n");
1077 break;
1078 }
1079
1080 case dr_unaligned_unsupported:
1081 {
1082 *inside_cost = VECT_MAX_COST;
1083
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1086 "vect_model_store_cost: unsupported access.\n");
1087 break;
1088 }
1089
1090 default:
1091 gcc_unreachable ();
1092 }
1093 }
1094
1095
1096 /* Function vect_model_load_cost
1097
1098 Models cost for loads. In the case of grouped accesses, one access has
1099 the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1102
1103 static void
1104 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1105 vect_memory_access_type memory_access_type,
1106 slp_instance instance,
1107 slp_tree slp_node,
1108 stmt_vector_for_cost *cost_vec)
1109 {
1110 unsigned int inside_cost = 0, prologue_cost = 0;
1111 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1112
1113 gcc_assert (cost_vec);
1114
1115 /* ??? Somehow we need to fix this at the callers. */
1116 if (slp_node)
1117 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1118
1119 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1120 {
1121 /* If the load is permuted then the alignment is determined by
1122 the first group element not by the first scalar stmt DR. */
1123 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1124 /* Record the cost for the permutation. */
1125 unsigned n_perms;
1126 unsigned assumed_nunits
1127 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1128 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1129 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1130 slp_vf, instance, true,
1131 &n_perms);
1132 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1133 first_stmt_info, 0, vect_body);
1134 /* And adjust the number of loads performed. This handles
1135 redundancies as well as loads that are later dead. */
1136 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1137 bitmap_clear (perm);
1138 for (unsigned i = 0;
1139 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1140 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1141 ncopies = 0;
1142 bool load_seen = false;
1143 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1144 {
1145 if (i % assumed_nunits == 0)
1146 {
1147 if (load_seen)
1148 ncopies++;
1149 load_seen = false;
1150 }
1151 if (bitmap_bit_p (perm, i))
1152 load_seen = true;
1153 }
1154 if (load_seen)
1155 ncopies++;
1156 gcc_assert (ncopies
1157 <= (DR_GROUP_SIZE (first_stmt_info)
1158 - DR_GROUP_GAP (first_stmt_info)
1159 + assumed_nunits - 1) / assumed_nunits);
1160 }
1161
1162 /* Grouped loads read all elements in the group at once,
1163 so we want the DR for the first statement. */
1164 stmt_vec_info first_stmt_info = stmt_info;
1165 if (!slp_node && grouped_access_p)
1166 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1167
1168 /* True if we should include any once-per-group costs as well as
1169 the cost of the statement itself. For SLP we only get called
1170 once per group anyhow. */
1171 bool first_stmt_p = (first_stmt_info == stmt_info);
1172
1173 /* We assume that the cost of a single load-lanes instruction is
1174 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1175 access is instead being provided by a load-and-permute operation,
1176 include the cost of the permutes. */
1177 if (first_stmt_p
1178 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1179 {
1180 /* Uses an even and odd extract operations or shuffle operations
1181 for each needed permute. */
1182 int group_size = DR_GROUP_SIZE (first_stmt_info);
1183 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1184 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1185 stmt_info, 0, vect_body);
1186
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE, vect_location,
1189 "vect_model_load_cost: strided group_size = %d .\n",
1190 group_size);
1191 }
1192
1193 /* The loads themselves. */
1194 if (memory_access_type == VMAT_ELEMENTWISE
1195 || memory_access_type == VMAT_GATHER_SCATTER)
1196 {
1197 /* N scalar loads plus gathering them into a vector. */
1198 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1199 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1200 inside_cost += record_stmt_cost (cost_vec,
1201 ncopies * assumed_nunits,
1202 scalar_load, stmt_info, 0, vect_body);
1203 }
1204 else
1205 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1206 &inside_cost, &prologue_cost,
1207 cost_vec, cost_vec, true);
1208 if (memory_access_type == VMAT_ELEMENTWISE
1209 || memory_access_type == VMAT_STRIDED_SLP)
1210 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1211 stmt_info, 0, vect_body);
1212
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: inside_cost = %d, "
1216 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1217 }
1218
1219
1220 /* Calculate cost of DR's memory access. */
1221 void
1222 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1223 bool add_realign_cost, unsigned int *inside_cost,
1224 unsigned int *prologue_cost,
1225 stmt_vector_for_cost *prologue_cost_vec,
1226 stmt_vector_for_cost *body_cost_vec,
1227 bool record_prologue_costs)
1228 {
1229 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1230 int alignment_support_scheme
1231 = vect_supportable_dr_alignment (dr_info, false);
1232
1233 switch (alignment_support_scheme)
1234 {
1235 case dr_aligned:
1236 {
1237 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1238 stmt_info, 0, vect_body);
1239
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE, vect_location,
1242 "vect_model_load_cost: aligned.\n");
1243
1244 break;
1245 }
1246 case dr_unaligned_supported:
1247 {
1248 /* Here, we assign an additional cost for the unaligned load. */
1249 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1250 unaligned_load, stmt_info,
1251 DR_MISALIGNMENT (dr_info),
1252 vect_body);
1253
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1258
1259 break;
1260 }
1261 case dr_explicit_realign:
1262 {
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1267
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1274
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1278
1279 break;
1280 }
1281 case dr_explicit_realign_optimized:
1282 {
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1287
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1294
1295 if (add_realign_cost && record_prologue_costs)
1296 {
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1304 }
1305
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1310
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1315
1316 break;
1317 }
1318
1319 case dr_unaligned_unsupported:
1320 {
1321 *inside_cost = VECT_MAX_COST;
1322
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1327 }
1328
1329 default:
1330 gcc_unreachable ();
1331 }
1332 }
1333
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1336
1337 static void
1338 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1340 {
1341 if (gsi)
1342 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1343 else
1344 {
1345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1346
1347 if (loop_vinfo)
1348 {
1349 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1350 basic_block new_bb;
1351 edge pe;
1352
1353 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1354 loop = loop->inner;
1355
1356 pe = loop_preheader_edge (loop);
1357 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1358 gcc_assert (!new_bb);
1359 }
1360 else
1361 {
1362 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1363 basic_block bb;
1364 gimple_stmt_iterator gsi_bb_start;
1365
1366 gcc_assert (bb_vinfo);
1367 bb = BB_VINFO_BB (bb_vinfo);
1368 gsi_bb_start = gsi_after_labels (bb);
1369 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1370 }
1371 }
1372
1373 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_NOTE, vect_location,
1375 "created new init_stmt: %G", new_stmt);
1376 }
1377
1378 /* Function vect_init_vector.
1379
1380 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1381 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1382 vector type a vector with all elements equal to VAL is created first.
1383 Place the initialization at BSI if it is not NULL. Otherwise, place the
1384 initialization at the loop preheader.
1385 Return the DEF of INIT_STMT.
1386 It will be used in the vectorization of STMT_INFO. */
1387
1388 tree
1389 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1390 gimple_stmt_iterator *gsi)
1391 {
1392 gimple *init_stmt;
1393 tree new_temp;
1394
1395 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1396 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1397 {
1398 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1399 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1400 {
1401 /* Scalar boolean value should be transformed into
1402 all zeros or all ones value before building a vector. */
1403 if (VECTOR_BOOLEAN_TYPE_P (type))
1404 {
1405 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1406 tree false_val = build_zero_cst (TREE_TYPE (type));
1407
1408 if (CONSTANT_CLASS_P (val))
1409 val = integer_zerop (val) ? false_val : true_val;
1410 else
1411 {
1412 new_temp = make_ssa_name (TREE_TYPE (type));
1413 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1414 val, true_val, false_val);
1415 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1416 val = new_temp;
1417 }
1418 }
1419 else if (CONSTANT_CLASS_P (val))
1420 val = fold_convert (TREE_TYPE (type), val);
1421 else
1422 {
1423 new_temp = make_ssa_name (TREE_TYPE (type));
1424 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1425 init_stmt = gimple_build_assign (new_temp,
1426 fold_build1 (VIEW_CONVERT_EXPR,
1427 TREE_TYPE (type),
1428 val));
1429 else
1430 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1431 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1432 val = new_temp;
1433 }
1434 }
1435 val = build_vector_from_val (type, val);
1436 }
1437
1438 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1439 init_stmt = gimple_build_assign (new_temp, val);
1440 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1441 return new_temp;
1442 }
1443
1444 /* Function vect_get_vec_def_for_operand_1.
1445
1446 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1447 with type DT that will be used in the vectorized stmt. */
1448
1449 tree
1450 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1451 enum vect_def_type dt)
1452 {
1453 tree vec_oprnd;
1454 stmt_vec_info vec_stmt_info;
1455
1456 switch (dt)
1457 {
1458 /* operand is a constant or a loop invariant. */
1459 case vect_constant_def:
1460 case vect_external_def:
1461 /* Code should use vect_get_vec_def_for_operand. */
1462 gcc_unreachable ();
1463
1464 /* Operand is defined by a loop header phi. In case of nested
1465 cycles we also may have uses of the backedge def. */
1466 case vect_reduction_def:
1467 case vect_double_reduction_def:
1468 case vect_nested_cycle:
1469 case vect_induction_def:
1470 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1471 || dt == vect_nested_cycle);
1472 /* Fallthru. */
1473
1474 /* operand is defined inside the loop. */
1475 case vect_internal_def:
1476 {
1477 /* Get the def from the vectorized stmt. */
1478 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1479 /* Get vectorized pattern statement. */
1480 if (!vec_stmt_info
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info))
1483 vec_stmt_info = (STMT_VINFO_VEC_STMT
1484 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1485 gcc_assert (vec_stmt_info);
1486 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1487 vec_oprnd = PHI_RESULT (phi);
1488 else
1489 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1490 return vec_oprnd;
1491 }
1492
1493 default:
1494 gcc_unreachable ();
1495 }
1496 }
1497
1498
1499 /* Function vect_get_vec_def_for_operand.
1500
1501 OP is an operand in STMT_VINFO. This function returns a (vector) def
1502 that will be used in the vectorized stmt for STMT_VINFO.
1503
1504 In the case that OP is an SSA_NAME which is defined in the loop, then
1505 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1506
1507 In case OP is an invariant or constant, a new stmt that creates a vector def
1508 needs to be introduced. VECTYPE may be used to specify a required type for
1509 vector invariant. */
1510
1511 tree
1512 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1513 {
1514 gimple *def_stmt;
1515 enum vect_def_type dt;
1516 bool is_simple_use;
1517 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1518
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location,
1521 "vect_get_vec_def_for_operand: %T\n", op);
1522
1523 stmt_vec_info def_stmt_info;
1524 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1525 &def_stmt_info, &def_stmt);
1526 gcc_assert (is_simple_use);
1527 if (def_stmt && dump_enabled_p ())
1528 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1529
1530 if (dt == vect_constant_def || dt == vect_external_def)
1531 {
1532 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1533 tree vector_type;
1534
1535 if (vectype)
1536 vector_type = vectype;
1537 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1538 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1539 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1540 else
1541 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1542
1543 gcc_assert (vector_type);
1544 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1545 }
1546 else
1547 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1548 }
1549
1550
1551 /* Function vect_get_vec_def_for_stmt_copy
1552
1553 Return a vector-def for an operand. This function is used when the
1554 vectorized stmt to be created (by the caller to this function) is a "copy"
1555 created in case the vectorized result cannot fit in one vector, and several
1556 copies of the vector-stmt are required. In this case the vector-def is
1557 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1558 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1559
1560 Context:
1561 In case the vectorization factor (VF) is bigger than the number
1562 of elements that can fit in a vectype (nunits), we have to generate
1563 more than one vector stmt to vectorize the scalar stmt. This situation
1564 arises when there are multiple data-types operated upon in the loop; the
1565 smallest data-type determines the VF, and as a result, when vectorizing
1566 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1567 vector stmt (each computing a vector of 'nunits' results, and together
1568 computing 'VF' results in each iteration). This function is called when
1569 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1570 which VF=16 and nunits=4, so the number of copies required is 4):
1571
1572 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1573
1574 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1575 VS1.1: vx.1 = memref1 VS1.2
1576 VS1.2: vx.2 = memref2 VS1.3
1577 VS1.3: vx.3 = memref3
1578
1579 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1580 VSnew.1: vz1 = vx.1 + ... VSnew.2
1581 VSnew.2: vz2 = vx.2 + ... VSnew.3
1582 VSnew.3: vz3 = vx.3 + ...
1583
1584 The vectorization of S1 is explained in vectorizable_load.
1585 The vectorization of S2:
1586 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1587 the function 'vect_get_vec_def_for_operand' is called to
1588 get the relevant vector-def for each operand of S2. For operand x it
1589 returns the vector-def 'vx.0'.
1590
1591 To create the remaining copies of the vector-stmt (VSnew.j), this
1592 function is called to get the relevant vector-def for each operand. It is
1593 obtained from the respective VS1.j stmt, which is recorded in the
1594 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1595
1596 For example, to obtain the vector-def 'vx.1' in order to create the
1597 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1598 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1599 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1600 and return its def ('vx.1').
1601 Overall, to create the above sequence this function will be called 3 times:
1602 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1603 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1604 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1605
1606 tree
1607 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1608 {
1609 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1610 if (!def_stmt_info)
1611 /* Do nothing; can reuse same def. */
1612 return vec_oprnd;
1613
1614 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1615 gcc_assert (def_stmt_info);
1616 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1617 vec_oprnd = PHI_RESULT (phi);
1618 else
1619 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1620 return vec_oprnd;
1621 }
1622
1623
1624 /* Get vectorized definitions for the operands to create a copy of an original
1625 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1626
1627 void
1628 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1629 vec<tree> *vec_oprnds0,
1630 vec<tree> *vec_oprnds1)
1631 {
1632 tree vec_oprnd = vec_oprnds0->pop ();
1633
1634 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1635 vec_oprnds0->quick_push (vec_oprnd);
1636
1637 if (vec_oprnds1 && vec_oprnds1->length ())
1638 {
1639 vec_oprnd = vec_oprnds1->pop ();
1640 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1641 vec_oprnds1->quick_push (vec_oprnd);
1642 }
1643 }
1644
1645
1646 /* Get vectorized definitions for OP0 and OP1. */
1647
1648 void
1649 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1650 vec<tree> *vec_oprnds0,
1651 vec<tree> *vec_oprnds1,
1652 slp_tree slp_node)
1653 {
1654 if (slp_node)
1655 {
1656 int nops = (op1 == NULL_TREE) ? 1 : 2;
1657 auto_vec<tree> ops (nops);
1658 auto_vec<vec<tree> > vec_defs (nops);
1659
1660 ops.quick_push (op0);
1661 if (op1)
1662 ops.quick_push (op1);
1663
1664 vect_get_slp_defs (ops, slp_node, &vec_defs);
1665
1666 *vec_oprnds0 = vec_defs[0];
1667 if (op1)
1668 *vec_oprnds1 = vec_defs[1];
1669 }
1670 else
1671 {
1672 tree vec_oprnd;
1673
1674 vec_oprnds0->create (1);
1675 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1676 vec_oprnds0->quick_push (vec_oprnd);
1677
1678 if (op1)
1679 {
1680 vec_oprnds1->create (1);
1681 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1682 vec_oprnds1->quick_push (vec_oprnd);
1683 }
1684 }
1685 }
1686
1687 /* Helper function called by vect_finish_replace_stmt and
1688 vect_finish_stmt_generation. Set the location of the new
1689 statement and create and return a stmt_vec_info for it. */
1690
1691 static stmt_vec_info
1692 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1693 {
1694 vec_info *vinfo = stmt_info->vinfo;
1695
1696 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1697
1698 if (dump_enabled_p ())
1699 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1700
1701 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1702
1703 /* While EH edges will generally prevent vectorization, stmt might
1704 e.g. be in a must-not-throw region. Ensure newly created stmts
1705 that could throw are part of the same region. */
1706 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1707 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1708 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1709
1710 return vec_stmt_info;
1711 }
1712
1713 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1714 which sets the same scalar result as STMT_INFO did. Create and return a
1715 stmt_vec_info for VEC_STMT. */
1716
1717 stmt_vec_info
1718 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1719 {
1720 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1721
1722 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1723 gsi_replace (&gsi, vec_stmt, true);
1724
1725 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1726 }
1727
1728 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1729 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1730
1731 stmt_vec_info
1732 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1733 gimple_stmt_iterator *gsi)
1734 {
1735 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1736
1737 if (!gsi_end_p (*gsi)
1738 && gimple_has_mem_ops (vec_stmt))
1739 {
1740 gimple *at_stmt = gsi_stmt (*gsi);
1741 tree vuse = gimple_vuse (at_stmt);
1742 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1743 {
1744 tree vdef = gimple_vdef (at_stmt);
1745 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1746 /* If we have an SSA vuse and insert a store, update virtual
1747 SSA form to avoid triggering the renamer. Do so only
1748 if we can easily see all uses - which is what almost always
1749 happens with the way vectorized stmts are inserted. */
1750 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1751 && ((is_gimple_assign (vec_stmt)
1752 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1753 || (is_gimple_call (vec_stmt)
1754 && !(gimple_call_flags (vec_stmt)
1755 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1756 {
1757 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1758 gimple_set_vdef (vec_stmt, new_vdef);
1759 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1760 }
1761 }
1762 }
1763 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1764 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1765 }
1766
1767 /* We want to vectorize a call to combined function CFN with function
1768 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1769 as the types of all inputs. Check whether this is possible using
1770 an internal function, returning its code if so or IFN_LAST if not. */
1771
1772 static internal_fn
1773 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1774 tree vectype_out, tree vectype_in)
1775 {
1776 internal_fn ifn;
1777 if (internal_fn_p (cfn))
1778 ifn = as_internal_fn (cfn);
1779 else
1780 ifn = associated_internal_fn (fndecl);
1781 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1782 {
1783 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1784 if (info.vectorizable)
1785 {
1786 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1787 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1788 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1789 OPTIMIZE_FOR_SPEED))
1790 return ifn;
1791 }
1792 }
1793 return IFN_LAST;
1794 }
1795
1796
1797 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1798 gimple_stmt_iterator *);
1799
1800 /* Check whether a load or store statement in the loop described by
1801 LOOP_VINFO is possible in a fully-masked loop. This is testing
1802 whether the vectorizer pass has the appropriate support, as well as
1803 whether the target does.
1804
1805 VLS_TYPE says whether the statement is a load or store and VECTYPE
1806 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1807 says how the load or store is going to be implemented and GROUP_SIZE
1808 is the number of load or store statements in the containing group.
1809 If the access is a gather load or scatter store, GS_INFO describes
1810 its arguments.
1811
1812 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1813 supported, otherwise record the required mask types. */
1814
1815 static void
1816 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1817 vec_load_store_type vls_type, int group_size,
1818 vect_memory_access_type memory_access_type,
1819 gather_scatter_info *gs_info)
1820 {
1821 /* Invariant loads need no special support. */
1822 if (memory_access_type == VMAT_INVARIANT)
1823 return;
1824
1825 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1826 machine_mode vecmode = TYPE_MODE (vectype);
1827 bool is_load = (vls_type == VLS_LOAD);
1828 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1829 {
1830 if (is_load
1831 ? !vect_load_lanes_supported (vectype, group_size, true)
1832 : !vect_store_lanes_supported (vectype, group_size, true))
1833 {
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "can't use a fully-masked loop because the"
1837 " target doesn't have an appropriate masked"
1838 " load/store-lanes instruction.\n");
1839 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1840 return;
1841 }
1842 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1843 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1844 return;
1845 }
1846
1847 if (memory_access_type == VMAT_GATHER_SCATTER)
1848 {
1849 internal_fn ifn = (is_load
1850 ? IFN_MASK_GATHER_LOAD
1851 : IFN_MASK_SCATTER_STORE);
1852 tree offset_type = TREE_TYPE (gs_info->offset);
1853 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1854 gs_info->memory_type,
1855 TYPE_SIGN (offset_type),
1856 gs_info->scale))
1857 {
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "can't use a fully-masked loop because the"
1861 " target doesn't have an appropriate masked"
1862 " gather load or scatter store instruction.\n");
1863 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1864 return;
1865 }
1866 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1867 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1868 return;
1869 }
1870
1871 if (memory_access_type != VMAT_CONTIGUOUS
1872 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1873 {
1874 /* Element X of the data must come from iteration i * VF + X of the
1875 scalar loop. We need more work to support other mappings. */
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "can't use a fully-masked loop because an access"
1879 " isn't contiguous.\n");
1880 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1881 return;
1882 }
1883
1884 machine_mode mask_mode;
1885 if (!(targetm.vectorize.get_mask_mode
1886 (GET_MODE_NUNITS (vecmode),
1887 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1888 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1889 {
1890 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "can't use a fully-masked loop because the target"
1893 " doesn't have the appropriate masked load or"
1894 " store.\n");
1895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1896 return;
1897 }
1898 /* We might load more scalars than we need for permuting SLP loads.
1899 We checked in get_group_load_store_type that the extra elements
1900 don't leak into a new vector. */
1901 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1902 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1903 unsigned int nvectors;
1904 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1905 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1906 else
1907 gcc_unreachable ();
1908 }
1909
1910 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1911 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1912 that needs to be applied to all loads and stores in a vectorized loop.
1913 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1914
1915 MASK_TYPE is the type of both masks. If new statements are needed,
1916 insert them before GSI. */
1917
1918 static tree
1919 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1920 gimple_stmt_iterator *gsi)
1921 {
1922 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1923 if (!loop_mask)
1924 return vec_mask;
1925
1926 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1927 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1928 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1929 vec_mask, loop_mask);
1930 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1931 return and_res;
1932 }
1933
1934 /* Determine whether we can use a gather load or scatter store to vectorize
1935 strided load or store STMT_INFO by truncating the current offset to a
1936 smaller width. We need to be able to construct an offset vector:
1937
1938 { 0, X, X*2, X*3, ... }
1939
1940 without loss of precision, where X is STMT_INFO's DR_STEP.
1941
1942 Return true if this is possible, describing the gather load or scatter
1943 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1944
1945 static bool
1946 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1947 loop_vec_info loop_vinfo, bool masked_p,
1948 gather_scatter_info *gs_info)
1949 {
1950 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1951 data_reference *dr = dr_info->dr;
1952 tree step = DR_STEP (dr);
1953 if (TREE_CODE (step) != INTEGER_CST)
1954 {
1955 /* ??? Perhaps we could use range information here? */
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "cannot truncate variable step.\n");
1959 return false;
1960 }
1961
1962 /* Get the number of bits in an element. */
1963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1964 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1965 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1966
1967 /* Set COUNT to the upper limit on the number of elements - 1.
1968 Start with the maximum vectorization factor. */
1969 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1970
1971 /* Try lowering COUNT to the number of scalar latch iterations. */
1972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1973 widest_int max_iters;
1974 if (max_loop_iterations (loop, &max_iters)
1975 && max_iters < count)
1976 count = max_iters.to_shwi ();
1977
1978 /* Try scales of 1 and the element size. */
1979 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1980 wi::overflow_type overflow = wi::OVF_NONE;
1981 for (int i = 0; i < 2; ++i)
1982 {
1983 int scale = scales[i];
1984 widest_int factor;
1985 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1986 continue;
1987
1988 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1989 in OFFSET_BITS bits. */
1990 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1991 if (overflow)
1992 continue;
1993 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1994 if (wi::min_precision (range, sign) > element_bits)
1995 {
1996 overflow = wi::OVF_UNKNOWN;
1997 continue;
1998 }
1999
2000 /* See whether the target supports the operation. */
2001 tree memory_type = TREE_TYPE (DR_REF (dr));
2002 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2003 memory_type, element_bits, sign, scale,
2004 &gs_info->ifn, &gs_info->element_type))
2005 continue;
2006
2007 tree offset_type = build_nonstandard_integer_type (element_bits,
2008 sign == UNSIGNED);
2009
2010 gs_info->decl = NULL_TREE;
2011 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2012 but we don't need to store that here. */
2013 gs_info->base = NULL_TREE;
2014 gs_info->offset = fold_convert (offset_type, step);
2015 gs_info->offset_dt = vect_constant_def;
2016 gs_info->offset_vectype = NULL_TREE;
2017 gs_info->scale = scale;
2018 gs_info->memory_type = memory_type;
2019 return true;
2020 }
2021
2022 if (overflow && dump_enabled_p ())
2023 dump_printf_loc (MSG_NOTE, vect_location,
2024 "truncating gather/scatter offset to %d bits"
2025 " might change its value.\n", element_bits);
2026
2027 return false;
2028 }
2029
2030 /* Return true if we can use gather/scatter internal functions to
2031 vectorize STMT_INFO, which is a grouped or strided load or store.
2032 MASKED_P is true if load or store is conditional. When returning
2033 true, fill in GS_INFO with the information required to perform the
2034 operation. */
2035
2036 static bool
2037 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2038 loop_vec_info loop_vinfo, bool masked_p,
2039 gather_scatter_info *gs_info)
2040 {
2041 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2042 || gs_info->decl)
2043 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2044 masked_p, gs_info);
2045
2046 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2047 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2048 tree offset_type = TREE_TYPE (gs_info->offset);
2049 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2050
2051 /* Enforced by vect_check_gather_scatter. */
2052 gcc_assert (element_bits >= offset_bits);
2053
2054 /* If the elements are wider than the offset, convert the offset to the
2055 same width, without changing its sign. */
2056 if (element_bits > offset_bits)
2057 {
2058 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2059 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2060 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2061 }
2062
2063 if (dump_enabled_p ())
2064 dump_printf_loc (MSG_NOTE, vect_location,
2065 "using gather/scatter for strided/grouped access,"
2066 " scale = %d\n", gs_info->scale);
2067
2068 return true;
2069 }
2070
2071 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2072 elements with a known constant step. Return -1 if that step
2073 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2074
2075 static int
2076 compare_step_with_zero (stmt_vec_info stmt_info)
2077 {
2078 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2079 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2080 size_zero_node);
2081 }
2082
2083 /* If the target supports a permute mask that reverses the elements in
2084 a vector of type VECTYPE, return that mask, otherwise return null. */
2085
2086 static tree
2087 perm_mask_for_reverse (tree vectype)
2088 {
2089 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2090
2091 /* The encoding has a single stepped pattern. */
2092 vec_perm_builder sel (nunits, 1, 3);
2093 for (int i = 0; i < 3; ++i)
2094 sel.quick_push (nunits - 1 - i);
2095
2096 vec_perm_indices indices (sel, 1, nunits);
2097 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2098 return NULL_TREE;
2099 return vect_gen_perm_mask_checked (vectype, indices);
2100 }
2101
2102 /* STMT_INFO is either a masked or unconditional store. Return the value
2103 being stored. */
2104
2105 tree
2106 vect_get_store_rhs (stmt_vec_info stmt_info)
2107 {
2108 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2109 {
2110 gcc_assert (gimple_assign_single_p (assign));
2111 return gimple_assign_rhs1 (assign);
2112 }
2113 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2114 {
2115 internal_fn ifn = gimple_call_internal_fn (call);
2116 int index = internal_fn_stored_value_index (ifn);
2117 gcc_assert (index >= 0);
2118 return gimple_call_arg (call, index);
2119 }
2120 gcc_unreachable ();
2121 }
2122
2123 /* A subroutine of get_load_store_type, with a subset of the same
2124 arguments. Handle the case where STMT_INFO is part of a grouped load
2125 or store.
2126
2127 For stores, the statements in the group are all consecutive
2128 and there is no gap at the end. For loads, the statements in the
2129 group might not be consecutive; there can be gaps between statements
2130 as well as at the end. */
2131
2132 static bool
2133 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2134 bool masked_p, vec_load_store_type vls_type,
2135 vect_memory_access_type *memory_access_type,
2136 gather_scatter_info *gs_info)
2137 {
2138 vec_info *vinfo = stmt_info->vinfo;
2139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2140 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144 bool single_element_p = (stmt_info == first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2148
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p = false;
2152
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p = (!masked_p
2156 && vls_type == VLS_LOAD
2157 && loop_vinfo
2158 && !loop->inner);
2159
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2163
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2166
2167 if (slp)
2168 {
2169 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2170 {
2171 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2172 separated by the stride, until we have a complete vector.
2173 Fall back to scalar accesses if that isn't possible. */
2174 if (multiple_p (nunits, group_size))
2175 *memory_access_type = VMAT_STRIDED_SLP;
2176 else
2177 *memory_access_type = VMAT_ELEMENTWISE;
2178 }
2179 else
2180 {
2181 overrun_p = loop_vinfo && gap != 0;
2182 if (overrun_p && vls_type != VLS_LOAD)
2183 {
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185 "Grouped store with gaps requires"
2186 " non-consecutive accesses\n");
2187 return false;
2188 }
2189 /* An overrun is fine if the trailing elements are smaller
2190 than the alignment boundary B. Every vector access will
2191 be a multiple of B and so we are guaranteed to access a
2192 non-gap element in the same B-sized block. */
2193 if (overrun_p
2194 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2195 / vect_get_scalar_dr_size (first_dr_info)))
2196 overrun_p = false;
2197 if (overrun_p && !can_overrun_p)
2198 {
2199 if (dump_enabled_p ())
2200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2201 "Peeling for outer loop is not supported\n");
2202 return false;
2203 }
2204 *memory_access_type = VMAT_CONTIGUOUS;
2205 }
2206 }
2207 else
2208 {
2209 /* We can always handle this case using elementwise accesses,
2210 but see if something more efficient is available. */
2211 *memory_access_type = VMAT_ELEMENTWISE;
2212
2213 /* If there is a gap at the end of the group then these optimizations
2214 would access excess elements in the last iteration. */
2215 bool would_overrun_p = (gap != 0);
2216 /* An overrun is fine if the trailing elements are smaller than the
2217 alignment boundary B. Every vector access will be a multiple of B
2218 and so we are guaranteed to access a non-gap element in the
2219 same B-sized block. */
2220 if (would_overrun_p
2221 && !masked_p
2222 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2223 / vect_get_scalar_dr_size (first_dr_info)))
2224 would_overrun_p = false;
2225
2226 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2227 && (can_overrun_p || !would_overrun_p)
2228 && compare_step_with_zero (stmt_info) > 0)
2229 {
2230 /* First cope with the degenerate case of a single-element
2231 vector. */
2232 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2233 *memory_access_type = VMAT_CONTIGUOUS;
2234
2235 /* Otherwise try using LOAD/STORE_LANES. */
2236 if (*memory_access_type == VMAT_ELEMENTWISE
2237 && (vls_type == VLS_LOAD
2238 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2239 : vect_store_lanes_supported (vectype, group_size,
2240 masked_p)))
2241 {
2242 *memory_access_type = VMAT_LOAD_STORE_LANES;
2243 overrun_p = would_overrun_p;
2244 }
2245
2246 /* If that fails, try using permuting loads. */
2247 if (*memory_access_type == VMAT_ELEMENTWISE
2248 && (vls_type == VLS_LOAD
2249 ? vect_grouped_load_supported (vectype, single_element_p,
2250 group_size)
2251 : vect_grouped_store_supported (vectype, group_size)))
2252 {
2253 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2254 overrun_p = would_overrun_p;
2255 }
2256 }
2257
2258 /* As a last resort, trying using a gather load or scatter store.
2259
2260 ??? Although the code can handle all group sizes correctly,
2261 it probably isn't a win to use separate strided accesses based
2262 on nearby locations. Or, even if it's a win over scalar code,
2263 it might not be a win over vectorizing at a lower VF, if that
2264 allows us to use contiguous accesses. */
2265 if (*memory_access_type == VMAT_ELEMENTWISE
2266 && single_element_p
2267 && loop_vinfo
2268 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2269 masked_p, gs_info))
2270 *memory_access_type = VMAT_GATHER_SCATTER;
2271 }
2272
2273 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2274 {
2275 /* STMT is the leader of the group. Check the operands of all the
2276 stmts of the group. */
2277 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2278 while (next_stmt_info)
2279 {
2280 tree op = vect_get_store_rhs (next_stmt_info);
2281 enum vect_def_type dt;
2282 if (!vect_is_simple_use (op, vinfo, &dt))
2283 {
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "use not simple.\n");
2287 return false;
2288 }
2289 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2290 }
2291 }
2292
2293 if (overrun_p)
2294 {
2295 gcc_assert (can_overrun_p);
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "Data access with gaps requires scalar "
2299 "epilogue loop\n");
2300 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2301 }
2302
2303 return true;
2304 }
2305
2306 /* A subroutine of get_load_store_type, with a subset of the same
2307 arguments. Handle the case where STMT_INFO is a load or store that
2308 accesses consecutive elements with a negative step. */
2309
2310 static vect_memory_access_type
2311 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2312 vec_load_store_type vls_type,
2313 unsigned int ncopies)
2314 {
2315 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2316 dr_alignment_support alignment_support_scheme;
2317
2318 if (ncopies > 1)
2319 {
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "multiple types with negative step.\n");
2323 return VMAT_ELEMENTWISE;
2324 }
2325
2326 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2327 if (alignment_support_scheme != dr_aligned
2328 && alignment_support_scheme != dr_unaligned_supported)
2329 {
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2332 "negative step but alignment required.\n");
2333 return VMAT_ELEMENTWISE;
2334 }
2335
2336 if (vls_type == VLS_STORE_INVARIANT)
2337 {
2338 if (dump_enabled_p ())
2339 dump_printf_loc (MSG_NOTE, vect_location,
2340 "negative step with invariant source;"
2341 " no permute needed.\n");
2342 return VMAT_CONTIGUOUS_DOWN;
2343 }
2344
2345 if (!perm_mask_for_reverse (vectype))
2346 {
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "negative step and reversing not supported.\n");
2350 return VMAT_ELEMENTWISE;
2351 }
2352
2353 return VMAT_CONTIGUOUS_REVERSE;
2354 }
2355
2356 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2357 if there is a memory access type that the vectorized form can use,
2358 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2359 or scatters, fill in GS_INFO accordingly.
2360
2361 SLP says whether we're performing SLP rather than loop vectorization.
2362 MASKED_P is true if the statement is conditional on a vectorized mask.
2363 VECTYPE is the vector type that the vectorized statements will use.
2364 NCOPIES is the number of vector statements that will be needed. */
2365
2366 static bool
2367 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2368 bool masked_p, vec_load_store_type vls_type,
2369 unsigned int ncopies,
2370 vect_memory_access_type *memory_access_type,
2371 gather_scatter_info *gs_info)
2372 {
2373 vec_info *vinfo = stmt_info->vinfo;
2374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2375 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2376 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2377 {
2378 *memory_access_type = VMAT_GATHER_SCATTER;
2379 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2380 gcc_unreachable ();
2381 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2382 &gs_info->offset_dt,
2383 &gs_info->offset_vectype))
2384 {
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2387 "%s index use not simple.\n",
2388 vls_type == VLS_LOAD ? "gather" : "scatter");
2389 return false;
2390 }
2391 }
2392 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2393 {
2394 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2395 vls_type, memory_access_type, gs_info))
2396 return false;
2397 }
2398 else if (STMT_VINFO_STRIDED_P (stmt_info))
2399 {
2400 gcc_assert (!slp);
2401 if (loop_vinfo
2402 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2403 masked_p, gs_info))
2404 *memory_access_type = VMAT_GATHER_SCATTER;
2405 else
2406 *memory_access_type = VMAT_ELEMENTWISE;
2407 }
2408 else
2409 {
2410 int cmp = compare_step_with_zero (stmt_info);
2411 if (cmp < 0)
2412 *memory_access_type = get_negative_load_store_type
2413 (stmt_info, vectype, vls_type, ncopies);
2414 else if (cmp == 0)
2415 {
2416 gcc_assert (vls_type == VLS_LOAD);
2417 *memory_access_type = VMAT_INVARIANT;
2418 }
2419 else
2420 *memory_access_type = VMAT_CONTIGUOUS;
2421 }
2422
2423 if ((*memory_access_type == VMAT_ELEMENTWISE
2424 || *memory_access_type == VMAT_STRIDED_SLP)
2425 && !nunits.is_constant ())
2426 {
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "Not using elementwise accesses due to variable "
2430 "vectorization factor.\n");
2431 return false;
2432 }
2433
2434 /* FIXME: At the moment the cost model seems to underestimate the
2435 cost of using elementwise accesses. This check preserves the
2436 traditional behavior until that can be fixed. */
2437 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2438 if (!first_stmt_info)
2439 first_stmt_info = stmt_info;
2440 if (*memory_access_type == VMAT_ELEMENTWISE
2441 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2442 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2443 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2444 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2445 {
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2448 "not falling back to elementwise accesses\n");
2449 return false;
2450 }
2451 return true;
2452 }
2453
2454 /* Return true if boolean argument MASK is suitable for vectorizing
2455 conditional load or store STMT_INFO. When returning true, store the type
2456 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2457 in *MASK_VECTYPE_OUT. */
2458
2459 static bool
2460 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2461 vect_def_type *mask_dt_out,
2462 tree *mask_vectype_out)
2463 {
2464 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2465 {
2466 if (dump_enabled_p ())
2467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2468 "mask argument is not a boolean.\n");
2469 return false;
2470 }
2471
2472 if (TREE_CODE (mask) != SSA_NAME)
2473 {
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2476 "mask argument is not an SSA name.\n");
2477 return false;
2478 }
2479
2480 enum vect_def_type mask_dt;
2481 tree mask_vectype;
2482 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2483 {
2484 if (dump_enabled_p ())
2485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2486 "mask use not simple.\n");
2487 return false;
2488 }
2489
2490 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2491 if (!mask_vectype)
2492 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2493
2494 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2495 {
2496 if (dump_enabled_p ())
2497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2498 "could not find an appropriate vector mask type.\n");
2499 return false;
2500 }
2501
2502 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2503 TYPE_VECTOR_SUBPARTS (vectype)))
2504 {
2505 if (dump_enabled_p ())
2506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2507 "vector mask type %T",
2508 " does not match vector data type %T.\n",
2509 mask_vectype, vectype);
2510
2511 return false;
2512 }
2513
2514 *mask_dt_out = mask_dt;
2515 *mask_vectype_out = mask_vectype;
2516 return true;
2517 }
2518
2519 /* Return true if stored value RHS is suitable for vectorizing store
2520 statement STMT_INFO. When returning true, store the type of the
2521 definition in *RHS_DT_OUT, the type of the vectorized store value in
2522 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2523
2524 static bool
2525 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2526 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2527 vec_load_store_type *vls_type_out)
2528 {
2529 /* In the case this is a store from a constant make sure
2530 native_encode_expr can handle it. */
2531 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2532 {
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2535 "cannot encode constant as a byte sequence.\n");
2536 return false;
2537 }
2538
2539 enum vect_def_type rhs_dt;
2540 tree rhs_vectype;
2541 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2542 {
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "use not simple.\n");
2546 return false;
2547 }
2548
2549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2550 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2551 {
2552 if (dump_enabled_p ())
2553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2554 "incompatible vector types.\n");
2555 return false;
2556 }
2557
2558 *rhs_dt_out = rhs_dt;
2559 *rhs_vectype_out = rhs_vectype;
2560 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2561 *vls_type_out = VLS_STORE_INVARIANT;
2562 else
2563 *vls_type_out = VLS_STORE;
2564 return true;
2565 }
2566
2567 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2568 Note that we support masks with floating-point type, in which case the
2569 floats are interpreted as a bitmask. */
2570
2571 static tree
2572 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2573 {
2574 if (TREE_CODE (masktype) == INTEGER_TYPE)
2575 return build_int_cst (masktype, -1);
2576 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2577 {
2578 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2579 mask = build_vector_from_val (masktype, mask);
2580 return vect_init_vector (stmt_info, mask, masktype, NULL);
2581 }
2582 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2583 {
2584 REAL_VALUE_TYPE r;
2585 long tmp[6];
2586 for (int j = 0; j < 6; ++j)
2587 tmp[j] = -1;
2588 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2589 tree mask = build_real (TREE_TYPE (masktype), r);
2590 mask = build_vector_from_val (masktype, mask);
2591 return vect_init_vector (stmt_info, mask, masktype, NULL);
2592 }
2593 gcc_unreachable ();
2594 }
2595
2596 /* Build an all-zero merge value of type VECTYPE while vectorizing
2597 STMT_INFO as a gather load. */
2598
2599 static tree
2600 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2601 {
2602 tree merge;
2603 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2604 merge = build_int_cst (TREE_TYPE (vectype), 0);
2605 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2606 {
2607 REAL_VALUE_TYPE r;
2608 long tmp[6];
2609 for (int j = 0; j < 6; ++j)
2610 tmp[j] = 0;
2611 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2612 merge = build_real (TREE_TYPE (vectype), r);
2613 }
2614 else
2615 gcc_unreachable ();
2616 merge = build_vector_from_val (vectype, merge);
2617 return vect_init_vector (stmt_info, merge, vectype, NULL);
2618 }
2619
2620 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2621 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2622 the gather load operation. If the load is conditional, MASK is the
2623 unvectorized condition and MASK_DT is its definition type, otherwise
2624 MASK is null. */
2625
2626 static void
2627 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2628 gimple_stmt_iterator *gsi,
2629 stmt_vec_info *vec_stmt,
2630 gather_scatter_info *gs_info,
2631 tree mask)
2632 {
2633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2634 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2635 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2636 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2637 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2638 edge pe = loop_preheader_edge (loop);
2639 enum { NARROW, NONE, WIDEN } modifier;
2640 poly_uint64 gather_off_nunits
2641 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2642
2643 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2644 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2645 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2646 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2647 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2648 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2649 tree scaletype = TREE_VALUE (arglist);
2650 tree real_masktype = masktype;
2651 gcc_checking_assert (types_compatible_p (srctype, rettype)
2652 && (!mask
2653 || TREE_CODE (masktype) == INTEGER_TYPE
2654 || types_compatible_p (srctype, masktype)));
2655 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2656 masktype = build_same_sized_truth_vector_type (srctype);
2657
2658 tree mask_halftype = masktype;
2659 tree perm_mask = NULL_TREE;
2660 tree mask_perm_mask = NULL_TREE;
2661 if (known_eq (nunits, gather_off_nunits))
2662 modifier = NONE;
2663 else if (known_eq (nunits * 2, gather_off_nunits))
2664 {
2665 modifier = WIDEN;
2666
2667 /* Currently widening gathers and scatters are only supported for
2668 fixed-length vectors. */
2669 int count = gather_off_nunits.to_constant ();
2670 vec_perm_builder sel (count, count, 1);
2671 for (int i = 0; i < count; ++i)
2672 sel.quick_push (i | (count / 2));
2673
2674 vec_perm_indices indices (sel, 1, count);
2675 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2676 indices);
2677 }
2678 else if (known_eq (nunits, gather_off_nunits * 2))
2679 {
2680 modifier = NARROW;
2681
2682 /* Currently narrowing gathers and scatters are only supported for
2683 fixed-length vectors. */
2684 int count = nunits.to_constant ();
2685 vec_perm_builder sel (count, count, 1);
2686 sel.quick_grow (count);
2687 for (int i = 0; i < count; ++i)
2688 sel[i] = i < count / 2 ? i : i + count / 2;
2689 vec_perm_indices indices (sel, 2, count);
2690 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2691
2692 ncopies *= 2;
2693
2694 if (mask && masktype == real_masktype)
2695 {
2696 for (int i = 0; i < count; ++i)
2697 sel[i] = i | (count / 2);
2698 indices.new_vector (sel, 2, count);
2699 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2700 }
2701 else if (mask)
2702 mask_halftype
2703 = build_same_sized_truth_vector_type (gs_info->offset_vectype);
2704 }
2705 else
2706 gcc_unreachable ();
2707
2708 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2709 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2710
2711 tree ptr = fold_convert (ptrtype, gs_info->base);
2712 if (!is_gimple_min_invariant (ptr))
2713 {
2714 gimple_seq seq;
2715 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2716 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2717 gcc_assert (!new_bb);
2718 }
2719
2720 tree scale = build_int_cst (scaletype, gs_info->scale);
2721
2722 tree vec_oprnd0 = NULL_TREE;
2723 tree vec_mask = NULL_TREE;
2724 tree src_op = NULL_TREE;
2725 tree mask_op = NULL_TREE;
2726 tree prev_res = NULL_TREE;
2727 stmt_vec_info prev_stmt_info = NULL;
2728
2729 if (!mask)
2730 {
2731 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2732 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2733 }
2734
2735 for (int j = 0; j < ncopies; ++j)
2736 {
2737 tree op, var;
2738 if (modifier == WIDEN && (j & 1))
2739 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2740 perm_mask, stmt_info, gsi);
2741 else if (j == 0)
2742 op = vec_oprnd0
2743 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2744 else
2745 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2746 vec_oprnd0);
2747
2748 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2749 {
2750 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2751 TYPE_VECTOR_SUBPARTS (idxtype)));
2752 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2753 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2754 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2755 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2756 op = var;
2757 }
2758
2759 if (mask)
2760 {
2761 if (mask_perm_mask && (j & 1))
2762 mask_op = permute_vec_elements (mask_op, mask_op,
2763 mask_perm_mask, stmt_info, gsi);
2764 else
2765 {
2766 if (j == 0)
2767 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2768 else if (modifier != NARROW || (j & 1) == 0)
2769 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2770 vec_mask);
2771
2772 mask_op = vec_mask;
2773 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2774 {
2775 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2776 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2777 gcc_assert (known_eq (sub1, sub2));
2778 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2779 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2780 gassign *new_stmt
2781 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2782 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2783 mask_op = var;
2784 }
2785 }
2786 if (modifier == NARROW && masktype != real_masktype)
2787 {
2788 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2789 gassign *new_stmt
2790 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2791 : VEC_UNPACK_LO_EXPR,
2792 mask_op);
2793 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2794 mask_op = var;
2795 }
2796 src_op = mask_op;
2797 }
2798
2799 tree mask_arg = mask_op;
2800 if (masktype != real_masktype)
2801 {
2802 tree utype, optype = TREE_TYPE (mask_op);
2803 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2804 utype = real_masktype;
2805 else
2806 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2807 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2808 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2809 gassign *new_stmt
2810 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2811 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2812 mask_arg = var;
2813 if (!useless_type_conversion_p (real_masktype, utype))
2814 {
2815 gcc_assert (TYPE_PRECISION (utype)
2816 <= TYPE_PRECISION (real_masktype));
2817 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2818 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2819 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2820 mask_arg = var;
2821 }
2822 src_op = build_zero_cst (srctype);
2823 }
2824 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2825 mask_arg, scale);
2826
2827 stmt_vec_info new_stmt_info;
2828 if (!useless_type_conversion_p (vectype, rettype))
2829 {
2830 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2831 TYPE_VECTOR_SUBPARTS (rettype)));
2832 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2833 gimple_call_set_lhs (new_call, op);
2834 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2835 var = make_ssa_name (vec_dest);
2836 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2837 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2838 new_stmt_info
2839 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2840 }
2841 else
2842 {
2843 var = make_ssa_name (vec_dest, new_call);
2844 gimple_call_set_lhs (new_call, var);
2845 new_stmt_info
2846 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2847 }
2848
2849 if (modifier == NARROW)
2850 {
2851 if ((j & 1) == 0)
2852 {
2853 prev_res = var;
2854 continue;
2855 }
2856 var = permute_vec_elements (prev_res, var, perm_mask,
2857 stmt_info, gsi);
2858 new_stmt_info = loop_vinfo->lookup_def (var);
2859 }
2860
2861 if (prev_stmt_info == NULL)
2862 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2863 else
2864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2865 prev_stmt_info = new_stmt_info;
2866 }
2867 }
2868
2869 /* Prepare the base and offset in GS_INFO for vectorization.
2870 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2871 to the vectorized offset argument for the first copy of STMT_INFO.
2872 STMT_INFO is the statement described by GS_INFO and LOOP is the
2873 containing loop. */
2874
2875 static void
2876 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2877 gather_scatter_info *gs_info,
2878 tree *dataref_ptr, tree *vec_offset)
2879 {
2880 gimple_seq stmts = NULL;
2881 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2882 if (stmts != NULL)
2883 {
2884 basic_block new_bb;
2885 edge pe = loop_preheader_edge (loop);
2886 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2887 gcc_assert (!new_bb);
2888 }
2889 tree offset_type = TREE_TYPE (gs_info->offset);
2890 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2891 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2892 offset_vectype);
2893 }
2894
2895 /* Prepare to implement a grouped or strided load or store using
2896 the gather load or scatter store operation described by GS_INFO.
2897 STMT_INFO is the load or store statement.
2898
2899 Set *DATAREF_BUMP to the amount that should be added to the base
2900 address after each copy of the vectorized statement. Set *VEC_OFFSET
2901 to an invariant offset vector in which element I has the value
2902 I * DR_STEP / SCALE. */
2903
2904 static void
2905 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2906 loop_vec_info loop_vinfo,
2907 gather_scatter_info *gs_info,
2908 tree *dataref_bump, tree *vec_offset)
2909 {
2910 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2911 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2912 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2913 gimple_seq stmts;
2914
2915 tree bump = size_binop (MULT_EXPR,
2916 fold_convert (sizetype, DR_STEP (dr)),
2917 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2918 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2919 if (stmts)
2920 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2921
2922 /* The offset given in GS_INFO can have pointer type, so use the element
2923 type of the vector instead. */
2924 tree offset_type = TREE_TYPE (gs_info->offset);
2925 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2926 offset_type = TREE_TYPE (offset_vectype);
2927
2928 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2929 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2930 ssize_int (gs_info->scale));
2931 step = fold_convert (offset_type, step);
2932 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2933
2934 /* Create {0, X, X*2, X*3, ...}. */
2935 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2936 build_zero_cst (offset_type), step);
2937 if (stmts)
2938 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2939 }
2940
2941 /* Return the amount that should be added to a vector pointer to move
2942 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2943 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2944 vectorization. */
2945
2946 static tree
2947 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
2948 vect_memory_access_type memory_access_type)
2949 {
2950 if (memory_access_type == VMAT_INVARIANT)
2951 return size_zero_node;
2952
2953 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2954 tree step = vect_dr_behavior (dr_info)->step;
2955 if (tree_int_cst_sgn (step) == -1)
2956 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2957 return iv_step;
2958 }
2959
2960 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2961
2962 static bool
2963 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2964 stmt_vec_info *vec_stmt, slp_tree slp_node,
2965 tree vectype_in, stmt_vector_for_cost *cost_vec)
2966 {
2967 tree op, vectype;
2968 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2969 vec_info *vinfo = stmt_info->vinfo;
2970 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2971 unsigned ncopies;
2972
2973 op = gimple_call_arg (stmt, 0);
2974 vectype = STMT_VINFO_VECTYPE (stmt_info);
2975 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2976
2977 /* Multiple types in SLP are handled by creating the appropriate number of
2978 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2979 case of SLP. */
2980 if (slp_node)
2981 ncopies = 1;
2982 else
2983 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2984
2985 gcc_assert (ncopies >= 1);
2986
2987 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2988 if (! char_vectype)
2989 return false;
2990
2991 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2992 unsigned word_bytes;
2993 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2994 return false;
2995
2996 /* The encoding uses one stepped pattern for each byte in the word. */
2997 vec_perm_builder elts (num_bytes, word_bytes, 3);
2998 for (unsigned i = 0; i < 3; ++i)
2999 for (unsigned j = 0; j < word_bytes; ++j)
3000 elts.quick_push ((i + 1) * word_bytes - j - 1);
3001
3002 vec_perm_indices indices (elts, 1, num_bytes);
3003 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3004 return false;
3005
3006 if (! vec_stmt)
3007 {
3008 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3009 DUMP_VECT_SCOPE ("vectorizable_bswap");
3010 if (! slp_node)
3011 {
3012 record_stmt_cost (cost_vec,
3013 1, vector_stmt, stmt_info, 0, vect_prologue);
3014 record_stmt_cost (cost_vec,
3015 ncopies, vec_perm, stmt_info, 0, vect_body);
3016 }
3017 return true;
3018 }
3019
3020 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3021
3022 /* Transform. */
3023 vec<tree> vec_oprnds = vNULL;
3024 stmt_vec_info new_stmt_info = NULL;
3025 stmt_vec_info prev_stmt_info = NULL;
3026 for (unsigned j = 0; j < ncopies; j++)
3027 {
3028 /* Handle uses. */
3029 if (j == 0)
3030 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3031 else
3032 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3033
3034 /* Arguments are ready. create the new vector stmt. */
3035 unsigned i;
3036 tree vop;
3037 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3038 {
3039 gimple *new_stmt;
3040 tree tem = make_ssa_name (char_vectype);
3041 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3042 char_vectype, vop));
3043 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3044 tree tem2 = make_ssa_name (char_vectype);
3045 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3046 tem, tem, bswap_vconst);
3047 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3048 tem = make_ssa_name (vectype);
3049 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3050 vectype, tem2));
3051 new_stmt_info
3052 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3053 if (slp_node)
3054 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3055 }
3056
3057 if (slp_node)
3058 continue;
3059
3060 if (j == 0)
3061 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3062 else
3063 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3064
3065 prev_stmt_info = new_stmt_info;
3066 }
3067
3068 vec_oprnds.release ();
3069 return true;
3070 }
3071
3072 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3073 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3074 in a single step. On success, store the binary pack code in
3075 *CONVERT_CODE. */
3076
3077 static bool
3078 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3079 tree_code *convert_code)
3080 {
3081 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3082 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3083 return false;
3084
3085 tree_code code;
3086 int multi_step_cvt = 0;
3087 auto_vec <tree, 8> interm_types;
3088 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3089 &code, &multi_step_cvt,
3090 &interm_types)
3091 || multi_step_cvt)
3092 return false;
3093
3094 *convert_code = code;
3095 return true;
3096 }
3097
3098 /* Function vectorizable_call.
3099
3100 Check if STMT_INFO performs a function call that can be vectorized.
3101 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3102 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3103 Return true if STMT_INFO is vectorizable in this way. */
3104
3105 static bool
3106 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3107 stmt_vec_info *vec_stmt, slp_tree slp_node,
3108 stmt_vector_for_cost *cost_vec)
3109 {
3110 gcall *stmt;
3111 tree vec_dest;
3112 tree scalar_dest;
3113 tree op;
3114 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3115 stmt_vec_info prev_stmt_info;
3116 tree vectype_out, vectype_in;
3117 poly_uint64 nunits_in;
3118 poly_uint64 nunits_out;
3119 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3120 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3121 vec_info *vinfo = stmt_info->vinfo;
3122 tree fndecl, new_temp, rhs_type;
3123 enum vect_def_type dt[4]
3124 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3125 vect_unknown_def_type };
3126 int ndts = ARRAY_SIZE (dt);
3127 int ncopies, j;
3128 auto_vec<tree, 8> vargs;
3129 auto_vec<tree, 8> orig_vargs;
3130 enum { NARROW, NONE, WIDEN } modifier;
3131 size_t i, nargs;
3132 tree lhs;
3133
3134 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3135 return false;
3136
3137 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3138 && ! vec_stmt)
3139 return false;
3140
3141 /* Is STMT_INFO a vectorizable call? */
3142 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3143 if (!stmt)
3144 return false;
3145
3146 if (gimple_call_internal_p (stmt)
3147 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3148 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3149 /* Handled by vectorizable_load and vectorizable_store. */
3150 return false;
3151
3152 if (gimple_call_lhs (stmt) == NULL_TREE
3153 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3154 return false;
3155
3156 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3157
3158 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3159
3160 /* Process function arguments. */
3161 rhs_type = NULL_TREE;
3162 vectype_in = NULL_TREE;
3163 nargs = gimple_call_num_args (stmt);
3164
3165 /* Bail out if the function has more than three arguments, we do not have
3166 interesting builtin functions to vectorize with more than two arguments
3167 except for fma. No arguments is also not good. */
3168 if (nargs == 0 || nargs > 4)
3169 return false;
3170
3171 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3172 combined_fn cfn = gimple_call_combined_fn (stmt);
3173 if (cfn == CFN_GOMP_SIMD_LANE)
3174 {
3175 nargs = 0;
3176 rhs_type = unsigned_type_node;
3177 }
3178
3179 int mask_opno = -1;
3180 if (internal_fn_p (cfn))
3181 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3182
3183 for (i = 0; i < nargs; i++)
3184 {
3185 tree opvectype;
3186
3187 op = gimple_call_arg (stmt, i);
3188 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3189 {
3190 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3192 "use not simple.\n");
3193 return false;
3194 }
3195
3196 /* Skip the mask argument to an internal function. This operand
3197 has been converted via a pattern if necessary. */
3198 if ((int) i == mask_opno)
3199 continue;
3200
3201 /* We can only handle calls with arguments of the same type. */
3202 if (rhs_type
3203 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3204 {
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3207 "argument types differ.\n");
3208 return false;
3209 }
3210 if (!rhs_type)
3211 rhs_type = TREE_TYPE (op);
3212
3213 if (!vectype_in)
3214 vectype_in = opvectype;
3215 else if (opvectype
3216 && opvectype != vectype_in)
3217 {
3218 if (dump_enabled_p ())
3219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3220 "argument vector types differ.\n");
3221 return false;
3222 }
3223 }
3224 /* If all arguments are external or constant defs use a vector type with
3225 the same size as the output vector type. */
3226 if (!vectype_in)
3227 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3228 if (vec_stmt)
3229 gcc_assert (vectype_in);
3230 if (!vectype_in)
3231 {
3232 if (dump_enabled_p ())
3233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3234 "no vectype for scalar type %T\n", rhs_type);
3235
3236 return false;
3237 }
3238
3239 /* FORNOW */
3240 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3241 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3242 if (known_eq (nunits_in * 2, nunits_out))
3243 modifier = NARROW;
3244 else if (known_eq (nunits_out, nunits_in))
3245 modifier = NONE;
3246 else if (known_eq (nunits_out * 2, nunits_in))
3247 modifier = WIDEN;
3248 else
3249 return false;
3250
3251 /* We only handle functions that do not read or clobber memory. */
3252 if (gimple_vuse (stmt))
3253 {
3254 if (dump_enabled_p ())
3255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3256 "function reads from or writes to memory.\n");
3257 return false;
3258 }
3259
3260 /* For now, we only vectorize functions if a target specific builtin
3261 is available. TODO -- in some cases, it might be profitable to
3262 insert the calls for pieces of the vector, in order to be able
3263 to vectorize other operations in the loop. */
3264 fndecl = NULL_TREE;
3265 internal_fn ifn = IFN_LAST;
3266 tree callee = gimple_call_fndecl (stmt);
3267
3268 /* First try using an internal function. */
3269 tree_code convert_code = ERROR_MARK;
3270 if (cfn != CFN_LAST
3271 && (modifier == NONE
3272 || (modifier == NARROW
3273 && simple_integer_narrowing (vectype_out, vectype_in,
3274 &convert_code))))
3275 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3276 vectype_in);
3277
3278 /* If that fails, try asking for a target-specific built-in function. */
3279 if (ifn == IFN_LAST)
3280 {
3281 if (cfn != CFN_LAST)
3282 fndecl = targetm.vectorize.builtin_vectorized_function
3283 (cfn, vectype_out, vectype_in);
3284 else if (callee)
3285 fndecl = targetm.vectorize.builtin_md_vectorized_function
3286 (callee, vectype_out, vectype_in);
3287 }
3288
3289 if (ifn == IFN_LAST && !fndecl)
3290 {
3291 if (cfn == CFN_GOMP_SIMD_LANE
3292 && !slp_node
3293 && loop_vinfo
3294 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3295 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3296 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3297 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3298 {
3299 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3300 { 0, 1, 2, ... vf - 1 } vector. */
3301 gcc_assert (nargs == 0);
3302 }
3303 else if (modifier == NONE
3304 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3305 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3306 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3307 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3308 vectype_in, cost_vec);
3309 else
3310 {
3311 if (dump_enabled_p ())
3312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3313 "function is not vectorizable.\n");
3314 return false;
3315 }
3316 }
3317
3318 if (slp_node)
3319 ncopies = 1;
3320 else if (modifier == NARROW && ifn == IFN_LAST)
3321 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3322 else
3323 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3324
3325 /* Sanity check: make sure that at least one copy of the vectorized stmt
3326 needs to be generated. */
3327 gcc_assert (ncopies >= 1);
3328
3329 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3330 if (!vec_stmt) /* transformation not required. */
3331 {
3332 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3333 DUMP_VECT_SCOPE ("vectorizable_call");
3334 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3335 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3336 record_stmt_cost (cost_vec, ncopies / 2,
3337 vec_promote_demote, stmt_info, 0, vect_body);
3338
3339 if (loop_vinfo && mask_opno >= 0)
3340 {
3341 unsigned int nvectors = (slp_node
3342 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3343 : ncopies);
3344 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3345 }
3346 return true;
3347 }
3348
3349 /* Transform. */
3350
3351 if (dump_enabled_p ())
3352 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3353
3354 /* Handle def. */
3355 scalar_dest = gimple_call_lhs (stmt);
3356 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3357
3358 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3359
3360 stmt_vec_info new_stmt_info = NULL;
3361 prev_stmt_info = NULL;
3362 if (modifier == NONE || ifn != IFN_LAST)
3363 {
3364 tree prev_res = NULL_TREE;
3365 vargs.safe_grow (nargs);
3366 orig_vargs.safe_grow (nargs);
3367 for (j = 0; j < ncopies; ++j)
3368 {
3369 /* Build argument list for the vectorized call. */
3370 if (slp_node)
3371 {
3372 auto_vec<vec<tree> > vec_defs (nargs);
3373 vec<tree> vec_oprnds0;
3374
3375 for (i = 0; i < nargs; i++)
3376 vargs[i] = gimple_call_arg (stmt, i);
3377 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3378 vec_oprnds0 = vec_defs[0];
3379
3380 /* Arguments are ready. Create the new vector stmt. */
3381 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3382 {
3383 size_t k;
3384 for (k = 0; k < nargs; k++)
3385 {
3386 vec<tree> vec_oprndsk = vec_defs[k];
3387 vargs[k] = vec_oprndsk[i];
3388 }
3389 if (modifier == NARROW)
3390 {
3391 /* We don't define any narrowing conditional functions
3392 at present. */
3393 gcc_assert (mask_opno < 0);
3394 tree half_res = make_ssa_name (vectype_in);
3395 gcall *call
3396 = gimple_build_call_internal_vec (ifn, vargs);
3397 gimple_call_set_lhs (call, half_res);
3398 gimple_call_set_nothrow (call, true);
3399 new_stmt_info
3400 = vect_finish_stmt_generation (stmt_info, call, gsi);
3401 if ((i & 1) == 0)
3402 {
3403 prev_res = half_res;
3404 continue;
3405 }
3406 new_temp = make_ssa_name (vec_dest);
3407 gimple *new_stmt
3408 = gimple_build_assign (new_temp, convert_code,
3409 prev_res, half_res);
3410 new_stmt_info
3411 = vect_finish_stmt_generation (stmt_info, new_stmt,
3412 gsi);
3413 }
3414 else
3415 {
3416 if (mask_opno >= 0 && masked_loop_p)
3417 {
3418 unsigned int vec_num = vec_oprnds0.length ();
3419 /* Always true for SLP. */
3420 gcc_assert (ncopies == 1);
3421 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3422 vectype_out, i);
3423 vargs[mask_opno] = prepare_load_store_mask
3424 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3425 }
3426
3427 gcall *call;
3428 if (ifn != IFN_LAST)
3429 call = gimple_build_call_internal_vec (ifn, vargs);
3430 else
3431 call = gimple_build_call_vec (fndecl, vargs);
3432 new_temp = make_ssa_name (vec_dest, call);
3433 gimple_call_set_lhs (call, new_temp);
3434 gimple_call_set_nothrow (call, true);
3435 new_stmt_info
3436 = vect_finish_stmt_generation (stmt_info, call, gsi);
3437 }
3438 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3439 }
3440
3441 for (i = 0; i < nargs; i++)
3442 {
3443 vec<tree> vec_oprndsi = vec_defs[i];
3444 vec_oprndsi.release ();
3445 }
3446 continue;
3447 }
3448
3449 for (i = 0; i < nargs; i++)
3450 {
3451 op = gimple_call_arg (stmt, i);
3452 if (j == 0)
3453 vec_oprnd0
3454 = vect_get_vec_def_for_operand (op, stmt_info);
3455 else
3456 vec_oprnd0
3457 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3458
3459 orig_vargs[i] = vargs[i] = vec_oprnd0;
3460 }
3461
3462 if (mask_opno >= 0 && masked_loop_p)
3463 {
3464 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3465 vectype_out, j);
3466 vargs[mask_opno]
3467 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3468 vargs[mask_opno], gsi);
3469 }
3470
3471 if (cfn == CFN_GOMP_SIMD_LANE)
3472 {
3473 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3474 tree new_var
3475 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3476 gimple *init_stmt = gimple_build_assign (new_var, cst);
3477 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3478 new_temp = make_ssa_name (vec_dest);
3479 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3480 new_stmt_info
3481 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3482 }
3483 else if (modifier == NARROW)
3484 {
3485 /* We don't define any narrowing conditional functions at
3486 present. */
3487 gcc_assert (mask_opno < 0);
3488 tree half_res = make_ssa_name (vectype_in);
3489 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3490 gimple_call_set_lhs (call, half_res);
3491 gimple_call_set_nothrow (call, true);
3492 new_stmt_info
3493 = vect_finish_stmt_generation (stmt_info, call, gsi);
3494 if ((j & 1) == 0)
3495 {
3496 prev_res = half_res;
3497 continue;
3498 }
3499 new_temp = make_ssa_name (vec_dest);
3500 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3501 prev_res, half_res);
3502 new_stmt_info
3503 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3504 }
3505 else
3506 {
3507 gcall *call;
3508 if (ifn != IFN_LAST)
3509 call = gimple_build_call_internal_vec (ifn, vargs);
3510 else
3511 call = gimple_build_call_vec (fndecl, vargs);
3512 new_temp = make_ssa_name (vec_dest, call);
3513 gimple_call_set_lhs (call, new_temp);
3514 gimple_call_set_nothrow (call, true);
3515 new_stmt_info
3516 = vect_finish_stmt_generation (stmt_info, call, gsi);
3517 }
3518
3519 if (j == (modifier == NARROW ? 1 : 0))
3520 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3521 else
3522 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3523
3524 prev_stmt_info = new_stmt_info;
3525 }
3526 }
3527 else if (modifier == NARROW)
3528 {
3529 /* We don't define any narrowing conditional functions at present. */
3530 gcc_assert (mask_opno < 0);
3531 for (j = 0; j < ncopies; ++j)
3532 {
3533 /* Build argument list for the vectorized call. */
3534 if (j == 0)
3535 vargs.create (nargs * 2);
3536 else
3537 vargs.truncate (0);
3538
3539 if (slp_node)
3540 {
3541 auto_vec<vec<tree> > vec_defs (nargs);
3542 vec<tree> vec_oprnds0;
3543
3544 for (i = 0; i < nargs; i++)
3545 vargs.quick_push (gimple_call_arg (stmt, i));
3546 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3547 vec_oprnds0 = vec_defs[0];
3548
3549 /* Arguments are ready. Create the new vector stmt. */
3550 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3551 {
3552 size_t k;
3553 vargs.truncate (0);
3554 for (k = 0; k < nargs; k++)
3555 {
3556 vec<tree> vec_oprndsk = vec_defs[k];
3557 vargs.quick_push (vec_oprndsk[i]);
3558 vargs.quick_push (vec_oprndsk[i + 1]);
3559 }
3560 gcall *call;
3561 if (ifn != IFN_LAST)
3562 call = gimple_build_call_internal_vec (ifn, vargs);
3563 else
3564 call = gimple_build_call_vec (fndecl, vargs);
3565 new_temp = make_ssa_name (vec_dest, call);
3566 gimple_call_set_lhs (call, new_temp);
3567 gimple_call_set_nothrow (call, true);
3568 new_stmt_info
3569 = vect_finish_stmt_generation (stmt_info, call, gsi);
3570 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3571 }
3572
3573 for (i = 0; i < nargs; i++)
3574 {
3575 vec<tree> vec_oprndsi = vec_defs[i];
3576 vec_oprndsi.release ();
3577 }
3578 continue;
3579 }
3580
3581 for (i = 0; i < nargs; i++)
3582 {
3583 op = gimple_call_arg (stmt, i);
3584 if (j == 0)
3585 {
3586 vec_oprnd0
3587 = vect_get_vec_def_for_operand (op, stmt_info);
3588 vec_oprnd1
3589 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3590 }
3591 else
3592 {
3593 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3594 2 * i + 1);
3595 vec_oprnd0
3596 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3597 vec_oprnd1
3598 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3599 }
3600
3601 vargs.quick_push (vec_oprnd0);
3602 vargs.quick_push (vec_oprnd1);
3603 }
3604
3605 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3606 new_temp = make_ssa_name (vec_dest, new_stmt);
3607 gimple_call_set_lhs (new_stmt, new_temp);
3608 new_stmt_info
3609 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3610
3611 if (j == 0)
3612 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3613 else
3614 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3615
3616 prev_stmt_info = new_stmt_info;
3617 }
3618
3619 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3620 }
3621 else
3622 /* No current target implements this case. */
3623 return false;
3624
3625 vargs.release ();
3626
3627 /* The call in STMT might prevent it from being removed in dce.
3628 We however cannot remove it here, due to the way the ssa name
3629 it defines is mapped to the new definition. So just replace
3630 rhs of the statement with something harmless. */
3631
3632 if (slp_node)
3633 return true;
3634
3635 stmt_info = vect_orig_stmt (stmt_info);
3636 lhs = gimple_get_lhs (stmt_info->stmt);
3637
3638 gassign *new_stmt
3639 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3640 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3641
3642 return true;
3643 }
3644
3645
3646 struct simd_call_arg_info
3647 {
3648 tree vectype;
3649 tree op;
3650 HOST_WIDE_INT linear_step;
3651 enum vect_def_type dt;
3652 unsigned int align;
3653 bool simd_lane_linear;
3654 };
3655
3656 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3657 is linear within simd lane (but not within whole loop), note it in
3658 *ARGINFO. */
3659
3660 static void
3661 vect_simd_lane_linear (tree op, struct loop *loop,
3662 struct simd_call_arg_info *arginfo)
3663 {
3664 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3665
3666 if (!is_gimple_assign (def_stmt)
3667 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3668 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3669 return;
3670
3671 tree base = gimple_assign_rhs1 (def_stmt);
3672 HOST_WIDE_INT linear_step = 0;
3673 tree v = gimple_assign_rhs2 (def_stmt);
3674 while (TREE_CODE (v) == SSA_NAME)
3675 {
3676 tree t;
3677 def_stmt = SSA_NAME_DEF_STMT (v);
3678 if (is_gimple_assign (def_stmt))
3679 switch (gimple_assign_rhs_code (def_stmt))
3680 {
3681 case PLUS_EXPR:
3682 t = gimple_assign_rhs2 (def_stmt);
3683 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3684 return;
3685 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3686 v = gimple_assign_rhs1 (def_stmt);
3687 continue;
3688 case MULT_EXPR:
3689 t = gimple_assign_rhs2 (def_stmt);
3690 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3691 return;
3692 linear_step = tree_to_shwi (t);
3693 v = gimple_assign_rhs1 (def_stmt);
3694 continue;
3695 CASE_CONVERT:
3696 t = gimple_assign_rhs1 (def_stmt);
3697 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3698 || (TYPE_PRECISION (TREE_TYPE (v))
3699 < TYPE_PRECISION (TREE_TYPE (t))))
3700 return;
3701 if (!linear_step)
3702 linear_step = 1;
3703 v = t;
3704 continue;
3705 default:
3706 return;
3707 }
3708 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3709 && loop->simduid
3710 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3711 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3712 == loop->simduid))
3713 {
3714 if (!linear_step)
3715 linear_step = 1;
3716 arginfo->linear_step = linear_step;
3717 arginfo->op = base;
3718 arginfo->simd_lane_linear = true;
3719 return;
3720 }
3721 }
3722 }
3723
3724 /* Return the number of elements in vector type VECTYPE, which is associated
3725 with a SIMD clone. At present these vectors always have a constant
3726 length. */
3727
3728 static unsigned HOST_WIDE_INT
3729 simd_clone_subparts (tree vectype)
3730 {
3731 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3732 }
3733
3734 /* Function vectorizable_simd_clone_call.
3735
3736 Check if STMT_INFO performs a function call that can be vectorized
3737 by calling a simd clone of the function.
3738 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3739 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3740 Return true if STMT_INFO is vectorizable in this way. */
3741
3742 static bool
3743 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3744 gimple_stmt_iterator *gsi,
3745 stmt_vec_info *vec_stmt, slp_tree slp_node,
3746 stmt_vector_for_cost *)
3747 {
3748 tree vec_dest;
3749 tree scalar_dest;
3750 tree op, type;
3751 tree vec_oprnd0 = NULL_TREE;
3752 stmt_vec_info prev_stmt_info;
3753 tree vectype;
3754 unsigned int nunits;
3755 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3756 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3757 vec_info *vinfo = stmt_info->vinfo;
3758 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3759 tree fndecl, new_temp;
3760 int ncopies, j;
3761 auto_vec<simd_call_arg_info> arginfo;
3762 vec<tree> vargs = vNULL;
3763 size_t i, nargs;
3764 tree lhs, rtype, ratype;
3765 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3766
3767 /* Is STMT a vectorizable call? */
3768 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3769 if (!stmt)
3770 return false;
3771
3772 fndecl = gimple_call_fndecl (stmt);
3773 if (fndecl == NULL_TREE)
3774 return false;
3775
3776 struct cgraph_node *node = cgraph_node::get (fndecl);
3777 if (node == NULL || node->simd_clones == NULL)
3778 return false;
3779
3780 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3781 return false;
3782
3783 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3784 && ! vec_stmt)
3785 return false;
3786
3787 if (gimple_call_lhs (stmt)
3788 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3789 return false;
3790
3791 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3792
3793 vectype = STMT_VINFO_VECTYPE (stmt_info);
3794
3795 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3796 return false;
3797
3798 /* FORNOW */
3799 if (slp_node)
3800 return false;
3801
3802 /* Process function arguments. */
3803 nargs = gimple_call_num_args (stmt);
3804
3805 /* Bail out if the function has zero arguments. */
3806 if (nargs == 0)
3807 return false;
3808
3809 arginfo.reserve (nargs, true);
3810
3811 for (i = 0; i < nargs; i++)
3812 {
3813 simd_call_arg_info thisarginfo;
3814 affine_iv iv;
3815
3816 thisarginfo.linear_step = 0;
3817 thisarginfo.align = 0;
3818 thisarginfo.op = NULL_TREE;
3819 thisarginfo.simd_lane_linear = false;
3820
3821 op = gimple_call_arg (stmt, i);
3822 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3823 &thisarginfo.vectype)
3824 || thisarginfo.dt == vect_uninitialized_def)
3825 {
3826 if (dump_enabled_p ())
3827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3828 "use not simple.\n");
3829 return false;
3830 }
3831
3832 if (thisarginfo.dt == vect_constant_def
3833 || thisarginfo.dt == vect_external_def)
3834 gcc_assert (thisarginfo.vectype == NULL_TREE);
3835 else
3836 gcc_assert (thisarginfo.vectype != NULL_TREE);
3837
3838 /* For linear arguments, the analyze phase should have saved
3839 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3840 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3841 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3842 {
3843 gcc_assert (vec_stmt);
3844 thisarginfo.linear_step
3845 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3846 thisarginfo.op
3847 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3848 thisarginfo.simd_lane_linear
3849 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3850 == boolean_true_node);
3851 /* If loop has been peeled for alignment, we need to adjust it. */
3852 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3853 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3854 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3855 {
3856 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3857 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3858 tree opt = TREE_TYPE (thisarginfo.op);
3859 bias = fold_convert (TREE_TYPE (step), bias);
3860 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3861 thisarginfo.op
3862 = fold_build2 (POINTER_TYPE_P (opt)
3863 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3864 thisarginfo.op, bias);
3865 }
3866 }
3867 else if (!vec_stmt
3868 && thisarginfo.dt != vect_constant_def
3869 && thisarginfo.dt != vect_external_def
3870 && loop_vinfo
3871 && TREE_CODE (op) == SSA_NAME
3872 && simple_iv (loop, loop_containing_stmt (stmt), op,
3873 &iv, false)
3874 && tree_fits_shwi_p (iv.step))
3875 {
3876 thisarginfo.linear_step = tree_to_shwi (iv.step);
3877 thisarginfo.op = iv.base;
3878 }
3879 else if ((thisarginfo.dt == vect_constant_def
3880 || thisarginfo.dt == vect_external_def)
3881 && POINTER_TYPE_P (TREE_TYPE (op)))
3882 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3883 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3884 linear too. */
3885 if (POINTER_TYPE_P (TREE_TYPE (op))
3886 && !thisarginfo.linear_step
3887 && !vec_stmt
3888 && thisarginfo.dt != vect_constant_def
3889 && thisarginfo.dt != vect_external_def
3890 && loop_vinfo
3891 && !slp_node
3892 && TREE_CODE (op) == SSA_NAME)
3893 vect_simd_lane_linear (op, loop, &thisarginfo);
3894
3895 arginfo.quick_push (thisarginfo);
3896 }
3897
3898 unsigned HOST_WIDE_INT vf;
3899 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3900 {
3901 if (dump_enabled_p ())
3902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3903 "not considering SIMD clones; not yet supported"
3904 " for variable-width vectors.\n");
3905 return false;
3906 }
3907
3908 unsigned int badness = 0;
3909 struct cgraph_node *bestn = NULL;
3910 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3911 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3912 else
3913 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3914 n = n->simdclone->next_clone)
3915 {
3916 unsigned int this_badness = 0;
3917 if (n->simdclone->simdlen > vf
3918 || n->simdclone->nargs != nargs)
3919 continue;
3920 if (n->simdclone->simdlen < vf)
3921 this_badness += (exact_log2 (vf)
3922 - exact_log2 (n->simdclone->simdlen)) * 1024;
3923 if (n->simdclone->inbranch)
3924 this_badness += 2048;
3925 int target_badness = targetm.simd_clone.usable (n);
3926 if (target_badness < 0)
3927 continue;
3928 this_badness += target_badness * 512;
3929 /* FORNOW: Have to add code to add the mask argument. */
3930 if (n->simdclone->inbranch)
3931 continue;
3932 for (i = 0; i < nargs; i++)
3933 {
3934 switch (n->simdclone->args[i].arg_type)
3935 {
3936 case SIMD_CLONE_ARG_TYPE_VECTOR:
3937 if (!useless_type_conversion_p
3938 (n->simdclone->args[i].orig_type,
3939 TREE_TYPE (gimple_call_arg (stmt, i))))
3940 i = -1;
3941 else if (arginfo[i].dt == vect_constant_def
3942 || arginfo[i].dt == vect_external_def
3943 || arginfo[i].linear_step)
3944 this_badness += 64;
3945 break;
3946 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3947 if (arginfo[i].dt != vect_constant_def
3948 && arginfo[i].dt != vect_external_def)
3949 i = -1;
3950 break;
3951 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3952 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3953 if (arginfo[i].dt == vect_constant_def
3954 || arginfo[i].dt == vect_external_def
3955 || (arginfo[i].linear_step
3956 != n->simdclone->args[i].linear_step))
3957 i = -1;
3958 break;
3959 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3960 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3961 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3962 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3963 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3964 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3965 /* FORNOW */
3966 i = -1;
3967 break;
3968 case SIMD_CLONE_ARG_TYPE_MASK:
3969 gcc_unreachable ();
3970 }
3971 if (i == (size_t) -1)
3972 break;
3973 if (n->simdclone->args[i].alignment > arginfo[i].align)
3974 {
3975 i = -1;
3976 break;
3977 }
3978 if (arginfo[i].align)
3979 this_badness += (exact_log2 (arginfo[i].align)
3980 - exact_log2 (n->simdclone->args[i].alignment));
3981 }
3982 if (i == (size_t) -1)
3983 continue;
3984 if (bestn == NULL || this_badness < badness)
3985 {
3986 bestn = n;
3987 badness = this_badness;
3988 }
3989 }
3990
3991 if (bestn == NULL)
3992 return false;
3993
3994 for (i = 0; i < nargs; i++)
3995 if ((arginfo[i].dt == vect_constant_def
3996 || arginfo[i].dt == vect_external_def)
3997 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3998 {
3999 arginfo[i].vectype
4000 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4001 i)));
4002 if (arginfo[i].vectype == NULL
4003 || (simd_clone_subparts (arginfo[i].vectype)
4004 > bestn->simdclone->simdlen))
4005 return false;
4006 }
4007
4008 fndecl = bestn->decl;
4009 nunits = bestn->simdclone->simdlen;
4010 ncopies = vf / nunits;
4011
4012 /* If the function isn't const, only allow it in simd loops where user
4013 has asserted that at least nunits consecutive iterations can be
4014 performed using SIMD instructions. */
4015 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4016 && gimple_vuse (stmt))
4017 return false;
4018
4019 /* Sanity check: make sure that at least one copy of the vectorized stmt
4020 needs to be generated. */
4021 gcc_assert (ncopies >= 1);
4022
4023 if (!vec_stmt) /* transformation not required. */
4024 {
4025 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4026 for (i = 0; i < nargs; i++)
4027 if ((bestn->simdclone->args[i].arg_type
4028 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4029 || (bestn->simdclone->args[i].arg_type
4030 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4031 {
4032 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4033 + 1);
4034 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4035 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4036 ? size_type_node : TREE_TYPE (arginfo[i].op);
4037 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4039 tree sll = arginfo[i].simd_lane_linear
4040 ? boolean_true_node : boolean_false_node;
4041 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4042 }
4043 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4044 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4045 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4046 return true;
4047 }
4048
4049 /* Transform. */
4050
4051 if (dump_enabled_p ())
4052 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4053
4054 /* Handle def. */
4055 scalar_dest = gimple_call_lhs (stmt);
4056 vec_dest = NULL_TREE;
4057 rtype = NULL_TREE;
4058 ratype = NULL_TREE;
4059 if (scalar_dest)
4060 {
4061 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4062 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4063 if (TREE_CODE (rtype) == ARRAY_TYPE)
4064 {
4065 ratype = rtype;
4066 rtype = TREE_TYPE (ratype);
4067 }
4068 }
4069
4070 prev_stmt_info = NULL;
4071 for (j = 0; j < ncopies; ++j)
4072 {
4073 /* Build argument list for the vectorized call. */
4074 if (j == 0)
4075 vargs.create (nargs);
4076 else
4077 vargs.truncate (0);
4078
4079 for (i = 0; i < nargs; i++)
4080 {
4081 unsigned int k, l, m, o;
4082 tree atype;
4083 op = gimple_call_arg (stmt, i);
4084 switch (bestn->simdclone->args[i].arg_type)
4085 {
4086 case SIMD_CLONE_ARG_TYPE_VECTOR:
4087 atype = bestn->simdclone->args[i].vector_type;
4088 o = nunits / simd_clone_subparts (atype);
4089 for (m = j * o; m < (j + 1) * o; m++)
4090 {
4091 if (simd_clone_subparts (atype)
4092 < simd_clone_subparts (arginfo[i].vectype))
4093 {
4094 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4095 k = (simd_clone_subparts (arginfo[i].vectype)
4096 / simd_clone_subparts (atype));
4097 gcc_assert ((k & (k - 1)) == 0);
4098 if (m == 0)
4099 vec_oprnd0
4100 = vect_get_vec_def_for_operand (op, stmt_info);
4101 else
4102 {
4103 vec_oprnd0 = arginfo[i].op;
4104 if ((m & (k - 1)) == 0)
4105 vec_oprnd0
4106 = vect_get_vec_def_for_stmt_copy (vinfo,
4107 vec_oprnd0);
4108 }
4109 arginfo[i].op = vec_oprnd0;
4110 vec_oprnd0
4111 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4112 bitsize_int (prec),
4113 bitsize_int ((m & (k - 1)) * prec));
4114 gassign *new_stmt
4115 = gimple_build_assign (make_ssa_name (atype),
4116 vec_oprnd0);
4117 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4118 vargs.safe_push (gimple_assign_lhs (new_stmt));
4119 }
4120 else
4121 {
4122 k = (simd_clone_subparts (atype)
4123 / simd_clone_subparts (arginfo[i].vectype));
4124 gcc_assert ((k & (k - 1)) == 0);
4125 vec<constructor_elt, va_gc> *ctor_elts;
4126 if (k != 1)
4127 vec_alloc (ctor_elts, k);
4128 else
4129 ctor_elts = NULL;
4130 for (l = 0; l < k; l++)
4131 {
4132 if (m == 0 && l == 0)
4133 vec_oprnd0
4134 = vect_get_vec_def_for_operand (op, stmt_info);
4135 else
4136 vec_oprnd0
4137 = vect_get_vec_def_for_stmt_copy (vinfo,
4138 arginfo[i].op);
4139 arginfo[i].op = vec_oprnd0;
4140 if (k == 1)
4141 break;
4142 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4143 vec_oprnd0);
4144 }
4145 if (k == 1)
4146 vargs.safe_push (vec_oprnd0);
4147 else
4148 {
4149 vec_oprnd0 = build_constructor (atype, ctor_elts);
4150 gassign *new_stmt
4151 = gimple_build_assign (make_ssa_name (atype),
4152 vec_oprnd0);
4153 vect_finish_stmt_generation (stmt_info, new_stmt,
4154 gsi);
4155 vargs.safe_push (gimple_assign_lhs (new_stmt));
4156 }
4157 }
4158 }
4159 break;
4160 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4161 vargs.safe_push (op);
4162 break;
4163 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4164 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4165 if (j == 0)
4166 {
4167 gimple_seq stmts;
4168 arginfo[i].op
4169 = force_gimple_operand (arginfo[i].op, &stmts, true,
4170 NULL_TREE);
4171 if (stmts != NULL)
4172 {
4173 basic_block new_bb;
4174 edge pe = loop_preheader_edge (loop);
4175 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4176 gcc_assert (!new_bb);
4177 }
4178 if (arginfo[i].simd_lane_linear)
4179 {
4180 vargs.safe_push (arginfo[i].op);
4181 break;
4182 }
4183 tree phi_res = copy_ssa_name (op);
4184 gphi *new_phi = create_phi_node (phi_res, loop->header);
4185 loop_vinfo->add_stmt (new_phi);
4186 add_phi_arg (new_phi, arginfo[i].op,
4187 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4188 enum tree_code code
4189 = POINTER_TYPE_P (TREE_TYPE (op))
4190 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4191 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4192 ? sizetype : TREE_TYPE (op);
4193 widest_int cst
4194 = wi::mul (bestn->simdclone->args[i].linear_step,
4195 ncopies * nunits);
4196 tree tcst = wide_int_to_tree (type, cst);
4197 tree phi_arg = copy_ssa_name (op);
4198 gassign *new_stmt
4199 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4200 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4201 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4202 loop_vinfo->add_stmt (new_stmt);
4203 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4204 UNKNOWN_LOCATION);
4205 arginfo[i].op = phi_res;
4206 vargs.safe_push (phi_res);
4207 }
4208 else
4209 {
4210 enum tree_code code
4211 = POINTER_TYPE_P (TREE_TYPE (op))
4212 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4213 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4214 ? sizetype : TREE_TYPE (op);
4215 widest_int cst
4216 = wi::mul (bestn->simdclone->args[i].linear_step,
4217 j * nunits);
4218 tree tcst = wide_int_to_tree (type, cst);
4219 new_temp = make_ssa_name (TREE_TYPE (op));
4220 gassign *new_stmt
4221 = gimple_build_assign (new_temp, code,
4222 arginfo[i].op, tcst);
4223 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4224 vargs.safe_push (new_temp);
4225 }
4226 break;
4227 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4228 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4229 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4230 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4231 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4232 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4233 default:
4234 gcc_unreachable ();
4235 }
4236 }
4237
4238 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4239 if (vec_dest)
4240 {
4241 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4242 if (ratype)
4243 new_temp = create_tmp_var (ratype);
4244 else if (simd_clone_subparts (vectype)
4245 == simd_clone_subparts (rtype))
4246 new_temp = make_ssa_name (vec_dest, new_call);
4247 else
4248 new_temp = make_ssa_name (rtype, new_call);
4249 gimple_call_set_lhs (new_call, new_temp);
4250 }
4251 stmt_vec_info new_stmt_info
4252 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4253
4254 if (vec_dest)
4255 {
4256 if (simd_clone_subparts (vectype) < nunits)
4257 {
4258 unsigned int k, l;
4259 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4260 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4261 k = nunits / simd_clone_subparts (vectype);
4262 gcc_assert ((k & (k - 1)) == 0);
4263 for (l = 0; l < k; l++)
4264 {
4265 tree t;
4266 if (ratype)
4267 {
4268 t = build_fold_addr_expr (new_temp);
4269 t = build2 (MEM_REF, vectype, t,
4270 build_int_cst (TREE_TYPE (t), l * bytes));
4271 }
4272 else
4273 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4274 bitsize_int (prec), bitsize_int (l * prec));
4275 gimple *new_stmt
4276 = gimple_build_assign (make_ssa_name (vectype), t);
4277 new_stmt_info
4278 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4279
4280 if (j == 0 && l == 0)
4281 STMT_VINFO_VEC_STMT (stmt_info)
4282 = *vec_stmt = new_stmt_info;
4283 else
4284 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4285
4286 prev_stmt_info = new_stmt_info;
4287 }
4288
4289 if (ratype)
4290 vect_clobber_variable (stmt_info, gsi, new_temp);
4291 continue;
4292 }
4293 else if (simd_clone_subparts (vectype) > nunits)
4294 {
4295 unsigned int k = (simd_clone_subparts (vectype)
4296 / simd_clone_subparts (rtype));
4297 gcc_assert ((k & (k - 1)) == 0);
4298 if ((j & (k - 1)) == 0)
4299 vec_alloc (ret_ctor_elts, k);
4300 if (ratype)
4301 {
4302 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4303 for (m = 0; m < o; m++)
4304 {
4305 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4306 size_int (m), NULL_TREE, NULL_TREE);
4307 gimple *new_stmt
4308 = gimple_build_assign (make_ssa_name (rtype), tem);
4309 new_stmt_info
4310 = vect_finish_stmt_generation (stmt_info, new_stmt,
4311 gsi);
4312 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4313 gimple_assign_lhs (new_stmt));
4314 }
4315 vect_clobber_variable (stmt_info, gsi, new_temp);
4316 }
4317 else
4318 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4319 if ((j & (k - 1)) != k - 1)
4320 continue;
4321 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4322 gimple *new_stmt
4323 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4324 new_stmt_info
4325 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4326
4327 if ((unsigned) j == k - 1)
4328 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4329 else
4330 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4331
4332 prev_stmt_info = new_stmt_info;
4333 continue;
4334 }
4335 else if (ratype)
4336 {
4337 tree t = build_fold_addr_expr (new_temp);
4338 t = build2 (MEM_REF, vectype, t,
4339 build_int_cst (TREE_TYPE (t), 0));
4340 gimple *new_stmt
4341 = gimple_build_assign (make_ssa_name (vec_dest), t);
4342 new_stmt_info
4343 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4344 vect_clobber_variable (stmt_info, gsi, new_temp);
4345 }
4346 }
4347
4348 if (j == 0)
4349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4350 else
4351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4352
4353 prev_stmt_info = new_stmt_info;
4354 }
4355
4356 vargs.release ();
4357
4358 /* The call in STMT might prevent it from being removed in dce.
4359 We however cannot remove it here, due to the way the ssa name
4360 it defines is mapped to the new definition. So just replace
4361 rhs of the statement with something harmless. */
4362
4363 if (slp_node)
4364 return true;
4365
4366 gimple *new_stmt;
4367 if (scalar_dest)
4368 {
4369 type = TREE_TYPE (scalar_dest);
4370 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4371 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4372 }
4373 else
4374 new_stmt = gimple_build_nop ();
4375 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4376 unlink_stmt_vdef (stmt);
4377
4378 return true;
4379 }
4380
4381
4382 /* Function vect_gen_widened_results_half
4383
4384 Create a vector stmt whose code, type, number of arguments, and result
4385 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4386 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4387 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4388 needs to be created (DECL is a function-decl of a target-builtin).
4389 STMT_INFO is the original scalar stmt that we are vectorizing. */
4390
4391 static gimple *
4392 vect_gen_widened_results_half (enum tree_code code,
4393 tree decl,
4394 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4395 tree vec_dest, gimple_stmt_iterator *gsi,
4396 stmt_vec_info stmt_info)
4397 {
4398 gimple *new_stmt;
4399 tree new_temp;
4400
4401 /* Generate half of the widened result: */
4402 if (code == CALL_EXPR)
4403 {
4404 /* Target specific support */
4405 if (op_type == binary_op)
4406 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4407 else
4408 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4409 new_temp = make_ssa_name (vec_dest, new_stmt);
4410 gimple_call_set_lhs (new_stmt, new_temp);
4411 }
4412 else
4413 {
4414 /* Generic support */
4415 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4416 if (op_type != binary_op)
4417 vec_oprnd1 = NULL;
4418 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4419 new_temp = make_ssa_name (vec_dest, new_stmt);
4420 gimple_assign_set_lhs (new_stmt, new_temp);
4421 }
4422 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4423
4424 return new_stmt;
4425 }
4426
4427
4428 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4429 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4430 containing scalar operand), and for the rest we get a copy with
4431 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4432 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4433 The vectors are collected into VEC_OPRNDS. */
4434
4435 static void
4436 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4437 vec<tree> *vec_oprnds, int multi_step_cvt)
4438 {
4439 vec_info *vinfo = stmt_info->vinfo;
4440 tree vec_oprnd;
4441
4442 /* Get first vector operand. */
4443 /* All the vector operands except the very first one (that is scalar oprnd)
4444 are stmt copies. */
4445 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4446 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4447 else
4448 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4449
4450 vec_oprnds->quick_push (vec_oprnd);
4451
4452 /* Get second vector operand. */
4453 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4454 vec_oprnds->quick_push (vec_oprnd);
4455
4456 *oprnd = vec_oprnd;
4457
4458 /* For conversion in multiple steps, continue to get operands
4459 recursively. */
4460 if (multi_step_cvt)
4461 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4462 multi_step_cvt - 1);
4463 }
4464
4465
4466 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4467 For multi-step conversions store the resulting vectors and call the function
4468 recursively. */
4469
4470 static void
4471 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4472 int multi_step_cvt,
4473 stmt_vec_info stmt_info,
4474 vec<tree> vec_dsts,
4475 gimple_stmt_iterator *gsi,
4476 slp_tree slp_node, enum tree_code code,
4477 stmt_vec_info *prev_stmt_info)
4478 {
4479 unsigned int i;
4480 tree vop0, vop1, new_tmp, vec_dest;
4481
4482 vec_dest = vec_dsts.pop ();
4483
4484 for (i = 0; i < vec_oprnds->length (); i += 2)
4485 {
4486 /* Create demotion operation. */
4487 vop0 = (*vec_oprnds)[i];
4488 vop1 = (*vec_oprnds)[i + 1];
4489 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4490 new_tmp = make_ssa_name (vec_dest, new_stmt);
4491 gimple_assign_set_lhs (new_stmt, new_tmp);
4492 stmt_vec_info new_stmt_info
4493 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4494
4495 if (multi_step_cvt)
4496 /* Store the resulting vector for next recursive call. */
4497 (*vec_oprnds)[i/2] = new_tmp;
4498 else
4499 {
4500 /* This is the last step of the conversion sequence. Store the
4501 vectors in SLP_NODE or in vector info of the scalar statement
4502 (or in STMT_VINFO_RELATED_STMT chain). */
4503 if (slp_node)
4504 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4505 else
4506 {
4507 if (!*prev_stmt_info)
4508 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4509 else
4510 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4511
4512 *prev_stmt_info = new_stmt_info;
4513 }
4514 }
4515 }
4516
4517 /* For multi-step demotion operations we first generate demotion operations
4518 from the source type to the intermediate types, and then combine the
4519 results (stored in VEC_OPRNDS) in demotion operation to the destination
4520 type. */
4521 if (multi_step_cvt)
4522 {
4523 /* At each level of recursion we have half of the operands we had at the
4524 previous level. */
4525 vec_oprnds->truncate ((i+1)/2);
4526 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4527 stmt_info, vec_dsts, gsi,
4528 slp_node, VEC_PACK_TRUNC_EXPR,
4529 prev_stmt_info);
4530 }
4531
4532 vec_dsts.quick_push (vec_dest);
4533 }
4534
4535
4536 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4537 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4538 STMT_INFO. For multi-step conversions store the resulting vectors and
4539 call the function recursively. */
4540
4541 static void
4542 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4543 vec<tree> *vec_oprnds1,
4544 stmt_vec_info stmt_info, tree vec_dest,
4545 gimple_stmt_iterator *gsi,
4546 enum tree_code code1,
4547 enum tree_code code2, tree decl1,
4548 tree decl2, int op_type)
4549 {
4550 int i;
4551 tree vop0, vop1, new_tmp1, new_tmp2;
4552 gimple *new_stmt1, *new_stmt2;
4553 vec<tree> vec_tmp = vNULL;
4554
4555 vec_tmp.create (vec_oprnds0->length () * 2);
4556 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4557 {
4558 if (op_type == binary_op)
4559 vop1 = (*vec_oprnds1)[i];
4560 else
4561 vop1 = NULL_TREE;
4562
4563 /* Generate the two halves of promotion operation. */
4564 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4565 op_type, vec_dest, gsi,
4566 stmt_info);
4567 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4568 op_type, vec_dest, gsi,
4569 stmt_info);
4570 if (is_gimple_call (new_stmt1))
4571 {
4572 new_tmp1 = gimple_call_lhs (new_stmt1);
4573 new_tmp2 = gimple_call_lhs (new_stmt2);
4574 }
4575 else
4576 {
4577 new_tmp1 = gimple_assign_lhs (new_stmt1);
4578 new_tmp2 = gimple_assign_lhs (new_stmt2);
4579 }
4580
4581 /* Store the results for the next step. */
4582 vec_tmp.quick_push (new_tmp1);
4583 vec_tmp.quick_push (new_tmp2);
4584 }
4585
4586 vec_oprnds0->release ();
4587 *vec_oprnds0 = vec_tmp;
4588 }
4589
4590
4591 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4592 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4593 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4594 Return true if STMT_INFO is vectorizable in this way. */
4595
4596 static bool
4597 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4598 stmt_vec_info *vec_stmt, slp_tree slp_node,
4599 stmt_vector_for_cost *cost_vec)
4600 {
4601 tree vec_dest;
4602 tree scalar_dest;
4603 tree op0, op1 = NULL_TREE;
4604 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4605 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4606 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4607 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4608 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4609 tree new_temp;
4610 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4611 int ndts = 2;
4612 stmt_vec_info prev_stmt_info;
4613 poly_uint64 nunits_in;
4614 poly_uint64 nunits_out;
4615 tree vectype_out, vectype_in;
4616 int ncopies, i, j;
4617 tree lhs_type, rhs_type;
4618 enum { NARROW, NONE, WIDEN } modifier;
4619 vec<tree> vec_oprnds0 = vNULL;
4620 vec<tree> vec_oprnds1 = vNULL;
4621 tree vop0;
4622 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4623 vec_info *vinfo = stmt_info->vinfo;
4624 int multi_step_cvt = 0;
4625 vec<tree> interm_types = vNULL;
4626 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4627 int op_type;
4628 unsigned short fltsz;
4629
4630 /* Is STMT a vectorizable conversion? */
4631
4632 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4633 return false;
4634
4635 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4636 && ! vec_stmt)
4637 return false;
4638
4639 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4640 if (!stmt)
4641 return false;
4642
4643 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4644 return false;
4645
4646 code = gimple_assign_rhs_code (stmt);
4647 if (!CONVERT_EXPR_CODE_P (code)
4648 && code != FIX_TRUNC_EXPR
4649 && code != FLOAT_EXPR
4650 && code != WIDEN_MULT_EXPR
4651 && code != WIDEN_LSHIFT_EXPR)
4652 return false;
4653
4654 op_type = TREE_CODE_LENGTH (code);
4655
4656 /* Check types of lhs and rhs. */
4657 scalar_dest = gimple_assign_lhs (stmt);
4658 lhs_type = TREE_TYPE (scalar_dest);
4659 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4660
4661 op0 = gimple_assign_rhs1 (stmt);
4662 rhs_type = TREE_TYPE (op0);
4663
4664 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4665 && !((INTEGRAL_TYPE_P (lhs_type)
4666 && INTEGRAL_TYPE_P (rhs_type))
4667 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4668 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4669 return false;
4670
4671 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4672 && ((INTEGRAL_TYPE_P (lhs_type)
4673 && !type_has_mode_precision_p (lhs_type))
4674 || (INTEGRAL_TYPE_P (rhs_type)
4675 && !type_has_mode_precision_p (rhs_type))))
4676 {
4677 if (dump_enabled_p ())
4678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4679 "type conversion to/from bit-precision unsupported."
4680 "\n");
4681 return false;
4682 }
4683
4684 /* Check the operands of the operation. */
4685 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4686 {
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4689 "use not simple.\n");
4690 return false;
4691 }
4692 if (op_type == binary_op)
4693 {
4694 bool ok;
4695
4696 op1 = gimple_assign_rhs2 (stmt);
4697 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4698 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4699 OP1. */
4700 if (CONSTANT_CLASS_P (op0))
4701 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4702 else
4703 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4704
4705 if (!ok)
4706 {
4707 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4709 "use not simple.\n");
4710 return false;
4711 }
4712 }
4713
4714 /* If op0 is an external or constant defs use a vector type of
4715 the same size as the output vector type. */
4716 if (!vectype_in)
4717 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4718 if (vec_stmt)
4719 gcc_assert (vectype_in);
4720 if (!vectype_in)
4721 {
4722 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4724 "no vectype for scalar type %T\n", rhs_type);
4725
4726 return false;
4727 }
4728
4729 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4730 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4731 {
4732 if (dump_enabled_p ())
4733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4734 "can't convert between boolean and non "
4735 "boolean vectors %T\n", rhs_type);
4736
4737 return false;
4738 }
4739
4740 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4741 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4742 if (known_eq (nunits_out, nunits_in))
4743 modifier = NONE;
4744 else if (multiple_p (nunits_out, nunits_in))
4745 modifier = NARROW;
4746 else
4747 {
4748 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4749 modifier = WIDEN;
4750 }
4751
4752 /* Multiple types in SLP are handled by creating the appropriate number of
4753 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4754 case of SLP. */
4755 if (slp_node)
4756 ncopies = 1;
4757 else if (modifier == NARROW)
4758 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4759 else
4760 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4761
4762 /* Sanity check: make sure that at least one copy of the vectorized stmt
4763 needs to be generated. */
4764 gcc_assert (ncopies >= 1);
4765
4766 bool found_mode = false;
4767 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4768 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4769 opt_scalar_mode rhs_mode_iter;
4770
4771 /* Supportable by target? */
4772 switch (modifier)
4773 {
4774 case NONE:
4775 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4776 return false;
4777 if (supportable_convert_operation (code, vectype_out, vectype_in,
4778 &decl1, &code1))
4779 break;
4780 /* FALLTHRU */
4781 unsupported:
4782 if (dump_enabled_p ())
4783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4784 "conversion not supported by target.\n");
4785 return false;
4786
4787 case WIDEN:
4788 if (supportable_widening_operation (code, stmt_info, vectype_out,
4789 vectype_in, &code1, &code2,
4790 &multi_step_cvt, &interm_types))
4791 {
4792 /* Binary widening operation can only be supported directly by the
4793 architecture. */
4794 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4795 break;
4796 }
4797
4798 if (code != FLOAT_EXPR
4799 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4800 goto unsupported;
4801
4802 fltsz = GET_MODE_SIZE (lhs_mode);
4803 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4804 {
4805 rhs_mode = rhs_mode_iter.require ();
4806 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4807 break;
4808
4809 cvt_type
4810 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4811 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4812 if (cvt_type == NULL_TREE)
4813 goto unsupported;
4814
4815 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4816 {
4817 if (!supportable_convert_operation (code, vectype_out,
4818 cvt_type, &decl1, &codecvt1))
4819 goto unsupported;
4820 }
4821 else if (!supportable_widening_operation (code, stmt_info,
4822 vectype_out, cvt_type,
4823 &codecvt1, &codecvt2,
4824 &multi_step_cvt,
4825 &interm_types))
4826 continue;
4827 else
4828 gcc_assert (multi_step_cvt == 0);
4829
4830 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4831 vectype_in, &code1, &code2,
4832 &multi_step_cvt, &interm_types))
4833 {
4834 found_mode = true;
4835 break;
4836 }
4837 }
4838
4839 if (!found_mode)
4840 goto unsupported;
4841
4842 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4843 codecvt2 = ERROR_MARK;
4844 else
4845 {
4846 multi_step_cvt++;
4847 interm_types.safe_push (cvt_type);
4848 cvt_type = NULL_TREE;
4849 }
4850 break;
4851
4852 case NARROW:
4853 gcc_assert (op_type == unary_op);
4854 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4855 &code1, &multi_step_cvt,
4856 &interm_types))
4857 break;
4858
4859 if (code != FIX_TRUNC_EXPR
4860 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4861 goto unsupported;
4862
4863 cvt_type
4864 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4865 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4866 if (cvt_type == NULL_TREE)
4867 goto unsupported;
4868 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4869 &decl1, &codecvt1))
4870 goto unsupported;
4871 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4872 &code1, &multi_step_cvt,
4873 &interm_types))
4874 break;
4875 goto unsupported;
4876
4877 default:
4878 gcc_unreachable ();
4879 }
4880
4881 if (!vec_stmt) /* transformation not required. */
4882 {
4883 DUMP_VECT_SCOPE ("vectorizable_conversion");
4884 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4885 {
4886 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4887 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4888 cost_vec);
4889 }
4890 else if (modifier == NARROW)
4891 {
4892 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4893 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4894 cost_vec);
4895 }
4896 else
4897 {
4898 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4899 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4900 cost_vec);
4901 }
4902 interm_types.release ();
4903 return true;
4904 }
4905
4906 /* Transform. */
4907 if (dump_enabled_p ())
4908 dump_printf_loc (MSG_NOTE, vect_location,
4909 "transform conversion. ncopies = %d.\n", ncopies);
4910
4911 if (op_type == binary_op)
4912 {
4913 if (CONSTANT_CLASS_P (op0))
4914 op0 = fold_convert (TREE_TYPE (op1), op0);
4915 else if (CONSTANT_CLASS_P (op1))
4916 op1 = fold_convert (TREE_TYPE (op0), op1);
4917 }
4918
4919 /* In case of multi-step conversion, we first generate conversion operations
4920 to the intermediate types, and then from that types to the final one.
4921 We create vector destinations for the intermediate type (TYPES) received
4922 from supportable_*_operation, and store them in the correct order
4923 for future use in vect_create_vectorized_*_stmts (). */
4924 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4925 vec_dest = vect_create_destination_var (scalar_dest,
4926 (cvt_type && modifier == WIDEN)
4927 ? cvt_type : vectype_out);
4928 vec_dsts.quick_push (vec_dest);
4929
4930 if (multi_step_cvt)
4931 {
4932 for (i = interm_types.length () - 1;
4933 interm_types.iterate (i, &intermediate_type); i--)
4934 {
4935 vec_dest = vect_create_destination_var (scalar_dest,
4936 intermediate_type);
4937 vec_dsts.quick_push (vec_dest);
4938 }
4939 }
4940
4941 if (cvt_type)
4942 vec_dest = vect_create_destination_var (scalar_dest,
4943 modifier == WIDEN
4944 ? vectype_out : cvt_type);
4945
4946 if (!slp_node)
4947 {
4948 if (modifier == WIDEN)
4949 {
4950 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4951 if (op_type == binary_op)
4952 vec_oprnds1.create (1);
4953 }
4954 else if (modifier == NARROW)
4955 vec_oprnds0.create (
4956 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4957 }
4958 else if (code == WIDEN_LSHIFT_EXPR)
4959 vec_oprnds1.create (slp_node->vec_stmts_size);
4960
4961 last_oprnd = op0;
4962 prev_stmt_info = NULL;
4963 switch (modifier)
4964 {
4965 case NONE:
4966 for (j = 0; j < ncopies; j++)
4967 {
4968 if (j == 0)
4969 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
4970 NULL, slp_node);
4971 else
4972 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
4973
4974 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4975 {
4976 stmt_vec_info new_stmt_info;
4977 /* Arguments are ready, create the new vector stmt. */
4978 if (code1 == CALL_EXPR)
4979 {
4980 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4981 new_temp = make_ssa_name (vec_dest, new_stmt);
4982 gimple_call_set_lhs (new_stmt, new_temp);
4983 new_stmt_info
4984 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4985 }
4986 else
4987 {
4988 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4989 gassign *new_stmt
4990 = gimple_build_assign (vec_dest, code1, vop0);
4991 new_temp = make_ssa_name (vec_dest, new_stmt);
4992 gimple_assign_set_lhs (new_stmt, new_temp);
4993 new_stmt_info
4994 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4995 }
4996
4997 if (slp_node)
4998 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4999 else
5000 {
5001 if (!prev_stmt_info)
5002 STMT_VINFO_VEC_STMT (stmt_info)
5003 = *vec_stmt = new_stmt_info;
5004 else
5005 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5006 prev_stmt_info = new_stmt_info;
5007 }
5008 }
5009 }
5010 break;
5011
5012 case WIDEN:
5013 /* In case the vectorization factor (VF) is bigger than the number
5014 of elements that we can fit in a vectype (nunits), we have to
5015 generate more than one vector stmt - i.e - we need to "unroll"
5016 the vector stmt by a factor VF/nunits. */
5017 for (j = 0; j < ncopies; j++)
5018 {
5019 /* Handle uses. */
5020 if (j == 0)
5021 {
5022 if (slp_node)
5023 {
5024 if (code == WIDEN_LSHIFT_EXPR)
5025 {
5026 unsigned int k;
5027
5028 vec_oprnd1 = op1;
5029 /* Store vec_oprnd1 for every vector stmt to be created
5030 for SLP_NODE. We check during the analysis that all
5031 the shift arguments are the same. */
5032 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5033 vec_oprnds1.quick_push (vec_oprnd1);
5034
5035 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5036 &vec_oprnds0, NULL, slp_node);
5037 }
5038 else
5039 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5040 &vec_oprnds1, slp_node);
5041 }
5042 else
5043 {
5044 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5045 vec_oprnds0.quick_push (vec_oprnd0);
5046 if (op_type == binary_op)
5047 {
5048 if (code == WIDEN_LSHIFT_EXPR)
5049 vec_oprnd1 = op1;
5050 else
5051 vec_oprnd1
5052 = vect_get_vec_def_for_operand (op1, stmt_info);
5053 vec_oprnds1.quick_push (vec_oprnd1);
5054 }
5055 }
5056 }
5057 else
5058 {
5059 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5060 vec_oprnds0.truncate (0);
5061 vec_oprnds0.quick_push (vec_oprnd0);
5062 if (op_type == binary_op)
5063 {
5064 if (code == WIDEN_LSHIFT_EXPR)
5065 vec_oprnd1 = op1;
5066 else
5067 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5068 vec_oprnd1);
5069 vec_oprnds1.truncate (0);
5070 vec_oprnds1.quick_push (vec_oprnd1);
5071 }
5072 }
5073
5074 /* Arguments are ready. Create the new vector stmts. */
5075 for (i = multi_step_cvt; i >= 0; i--)
5076 {
5077 tree this_dest = vec_dsts[i];
5078 enum tree_code c1 = code1, c2 = code2;
5079 if (i == 0 && codecvt2 != ERROR_MARK)
5080 {
5081 c1 = codecvt1;
5082 c2 = codecvt2;
5083 }
5084 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5085 &vec_oprnds1, stmt_info,
5086 this_dest, gsi,
5087 c1, c2, decl1, decl2,
5088 op_type);
5089 }
5090
5091 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5092 {
5093 stmt_vec_info new_stmt_info;
5094 if (cvt_type)
5095 {
5096 if (codecvt1 == CALL_EXPR)
5097 {
5098 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5099 new_temp = make_ssa_name (vec_dest, new_stmt);
5100 gimple_call_set_lhs (new_stmt, new_temp);
5101 new_stmt_info
5102 = vect_finish_stmt_generation (stmt_info, new_stmt,
5103 gsi);
5104 }
5105 else
5106 {
5107 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5108 new_temp = make_ssa_name (vec_dest);
5109 gassign *new_stmt
5110 = gimple_build_assign (new_temp, codecvt1, vop0);
5111 new_stmt_info
5112 = vect_finish_stmt_generation (stmt_info, new_stmt,
5113 gsi);
5114 }
5115 }
5116 else
5117 new_stmt_info = vinfo->lookup_def (vop0);
5118
5119 if (slp_node)
5120 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5121 else
5122 {
5123 if (!prev_stmt_info)
5124 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5125 else
5126 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5127 prev_stmt_info = new_stmt_info;
5128 }
5129 }
5130 }
5131
5132 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5133 break;
5134
5135 case NARROW:
5136 /* In case the vectorization factor (VF) is bigger than the number
5137 of elements that we can fit in a vectype (nunits), we have to
5138 generate more than one vector stmt - i.e - we need to "unroll"
5139 the vector stmt by a factor VF/nunits. */
5140 for (j = 0; j < ncopies; j++)
5141 {
5142 /* Handle uses. */
5143 if (slp_node)
5144 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5145 slp_node);
5146 else
5147 {
5148 vec_oprnds0.truncate (0);
5149 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5150 vect_pow2 (multi_step_cvt) - 1);
5151 }
5152
5153 /* Arguments are ready. Create the new vector stmts. */
5154 if (cvt_type)
5155 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5156 {
5157 if (codecvt1 == CALL_EXPR)
5158 {
5159 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5160 new_temp = make_ssa_name (vec_dest, new_stmt);
5161 gimple_call_set_lhs (new_stmt, new_temp);
5162 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5163 }
5164 else
5165 {
5166 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5167 new_temp = make_ssa_name (vec_dest);
5168 gassign *new_stmt
5169 = gimple_build_assign (new_temp, codecvt1, vop0);
5170 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5171 }
5172
5173 vec_oprnds0[i] = new_temp;
5174 }
5175
5176 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5177 stmt_info, vec_dsts, gsi,
5178 slp_node, code1,
5179 &prev_stmt_info);
5180 }
5181
5182 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5183 break;
5184 }
5185
5186 vec_oprnds0.release ();
5187 vec_oprnds1.release ();
5188 interm_types.release ();
5189
5190 return true;
5191 }
5192
5193
5194 /* Function vectorizable_assignment.
5195
5196 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5197 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5198 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5199 Return true if STMT_INFO is vectorizable in this way. */
5200
5201 static bool
5202 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5203 stmt_vec_info *vec_stmt, slp_tree slp_node,
5204 stmt_vector_for_cost *cost_vec)
5205 {
5206 tree vec_dest;
5207 tree scalar_dest;
5208 tree op;
5209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5210 tree new_temp;
5211 enum vect_def_type dt[1] = {vect_unknown_def_type};
5212 int ndts = 1;
5213 int ncopies;
5214 int i, j;
5215 vec<tree> vec_oprnds = vNULL;
5216 tree vop;
5217 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5218 vec_info *vinfo = stmt_info->vinfo;
5219 stmt_vec_info prev_stmt_info = NULL;
5220 enum tree_code code;
5221 tree vectype_in;
5222
5223 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5224 return false;
5225
5226 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5227 && ! vec_stmt)
5228 return false;
5229
5230 /* Is vectorizable assignment? */
5231 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5232 if (!stmt)
5233 return false;
5234
5235 scalar_dest = gimple_assign_lhs (stmt);
5236 if (TREE_CODE (scalar_dest) != SSA_NAME)
5237 return false;
5238
5239 code = gimple_assign_rhs_code (stmt);
5240 if (gimple_assign_single_p (stmt)
5241 || code == PAREN_EXPR
5242 || CONVERT_EXPR_CODE_P (code))
5243 op = gimple_assign_rhs1 (stmt);
5244 else
5245 return false;
5246
5247 if (code == VIEW_CONVERT_EXPR)
5248 op = TREE_OPERAND (op, 0);
5249
5250 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5251 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5252
5253 /* Multiple types in SLP are handled by creating the appropriate number of
5254 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5255 case of SLP. */
5256 if (slp_node)
5257 ncopies = 1;
5258 else
5259 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5260
5261 gcc_assert (ncopies >= 1);
5262
5263 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5264 {
5265 if (dump_enabled_p ())
5266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5267 "use not simple.\n");
5268 return false;
5269 }
5270
5271 /* We can handle NOP_EXPR conversions that do not change the number
5272 of elements or the vector size. */
5273 if ((CONVERT_EXPR_CODE_P (code)
5274 || code == VIEW_CONVERT_EXPR)
5275 && (!vectype_in
5276 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5277 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5278 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5279 return false;
5280
5281 /* We do not handle bit-precision changes. */
5282 if ((CONVERT_EXPR_CODE_P (code)
5283 || code == VIEW_CONVERT_EXPR)
5284 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5285 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5286 || !type_has_mode_precision_p (TREE_TYPE (op)))
5287 /* But a conversion that does not change the bit-pattern is ok. */
5288 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5289 > TYPE_PRECISION (TREE_TYPE (op)))
5290 && TYPE_UNSIGNED (TREE_TYPE (op)))
5291 /* Conversion between boolean types of different sizes is
5292 a simple assignment in case their vectypes are same
5293 boolean vectors. */
5294 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5295 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5296 {
5297 if (dump_enabled_p ())
5298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5299 "type conversion to/from bit-precision "
5300 "unsupported.\n");
5301 return false;
5302 }
5303
5304 if (!vec_stmt) /* transformation not required. */
5305 {
5306 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5307 DUMP_VECT_SCOPE ("vectorizable_assignment");
5308 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5309 return true;
5310 }
5311
5312 /* Transform. */
5313 if (dump_enabled_p ())
5314 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5315
5316 /* Handle def. */
5317 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5318
5319 /* Handle use. */
5320 for (j = 0; j < ncopies; j++)
5321 {
5322 /* Handle uses. */
5323 if (j == 0)
5324 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5325 else
5326 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5327
5328 /* Arguments are ready. create the new vector stmt. */
5329 stmt_vec_info new_stmt_info = NULL;
5330 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5331 {
5332 if (CONVERT_EXPR_CODE_P (code)
5333 || code == VIEW_CONVERT_EXPR)
5334 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5335 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5336 new_temp = make_ssa_name (vec_dest, new_stmt);
5337 gimple_assign_set_lhs (new_stmt, new_temp);
5338 new_stmt_info
5339 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5340 if (slp_node)
5341 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5342 }
5343
5344 if (slp_node)
5345 continue;
5346
5347 if (j == 0)
5348 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5349 else
5350 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5351
5352 prev_stmt_info = new_stmt_info;
5353 }
5354
5355 vec_oprnds.release ();
5356 return true;
5357 }
5358
5359
5360 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5361 either as shift by a scalar or by a vector. */
5362
5363 bool
5364 vect_supportable_shift (enum tree_code code, tree scalar_type)
5365 {
5366
5367 machine_mode vec_mode;
5368 optab optab;
5369 int icode;
5370 tree vectype;
5371
5372 vectype = get_vectype_for_scalar_type (scalar_type);
5373 if (!vectype)
5374 return false;
5375
5376 optab = optab_for_tree_code (code, vectype, optab_scalar);
5377 if (!optab
5378 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5379 {
5380 optab = optab_for_tree_code (code, vectype, optab_vector);
5381 if (!optab
5382 || (optab_handler (optab, TYPE_MODE (vectype))
5383 == CODE_FOR_nothing))
5384 return false;
5385 }
5386
5387 vec_mode = TYPE_MODE (vectype);
5388 icode = (int) optab_handler (optab, vec_mode);
5389 if (icode == CODE_FOR_nothing)
5390 return false;
5391
5392 return true;
5393 }
5394
5395
5396 /* Function vectorizable_shift.
5397
5398 Check if STMT_INFO performs a shift operation that can be vectorized.
5399 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5400 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5401 Return true if STMT_INFO is vectorizable in this way. */
5402
5403 bool
5404 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5405 stmt_vec_info *vec_stmt, slp_tree slp_node,
5406 stmt_vector_for_cost *cost_vec)
5407 {
5408 tree vec_dest;
5409 tree scalar_dest;
5410 tree op0, op1 = NULL;
5411 tree vec_oprnd1 = NULL_TREE;
5412 tree vectype;
5413 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5414 enum tree_code code;
5415 machine_mode vec_mode;
5416 tree new_temp;
5417 optab optab;
5418 int icode;
5419 machine_mode optab_op2_mode;
5420 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5421 int ndts = 2;
5422 stmt_vec_info prev_stmt_info;
5423 poly_uint64 nunits_in;
5424 poly_uint64 nunits_out;
5425 tree vectype_out;
5426 tree op1_vectype;
5427 int ncopies;
5428 int j, i;
5429 vec<tree> vec_oprnds0 = vNULL;
5430 vec<tree> vec_oprnds1 = vNULL;
5431 tree vop0, vop1;
5432 unsigned int k;
5433 bool scalar_shift_arg = true;
5434 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5435 vec_info *vinfo = stmt_info->vinfo;
5436
5437 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5438 return false;
5439
5440 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5441 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5442 && ! vec_stmt)
5443 return false;
5444
5445 /* Is STMT a vectorizable binary/unary operation? */
5446 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5447 if (!stmt)
5448 return false;
5449
5450 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5451 return false;
5452
5453 code = gimple_assign_rhs_code (stmt);
5454
5455 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5456 || code == RROTATE_EXPR))
5457 return false;
5458
5459 scalar_dest = gimple_assign_lhs (stmt);
5460 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5461 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5462 {
5463 if (dump_enabled_p ())
5464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5465 "bit-precision shifts not supported.\n");
5466 return false;
5467 }
5468
5469 op0 = gimple_assign_rhs1 (stmt);
5470 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5471 {
5472 if (dump_enabled_p ())
5473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5474 "use not simple.\n");
5475 return false;
5476 }
5477 /* If op0 is an external or constant def use a vector type with
5478 the same size as the output vector type. */
5479 if (!vectype)
5480 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5481 if (vec_stmt)
5482 gcc_assert (vectype);
5483 if (!vectype)
5484 {
5485 if (dump_enabled_p ())
5486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5487 "no vectype for scalar type\n");
5488 return false;
5489 }
5490
5491 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5492 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5493 if (maybe_ne (nunits_out, nunits_in))
5494 return false;
5495
5496 op1 = gimple_assign_rhs2 (stmt);
5497 stmt_vec_info op1_def_stmt_info;
5498 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5499 &op1_def_stmt_info))
5500 {
5501 if (dump_enabled_p ())
5502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5503 "use not simple.\n");
5504 return false;
5505 }
5506
5507 /* Multiple types in SLP are handled by creating the appropriate number of
5508 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5509 case of SLP. */
5510 if (slp_node)
5511 ncopies = 1;
5512 else
5513 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5514
5515 gcc_assert (ncopies >= 1);
5516
5517 /* Determine whether the shift amount is a vector, or scalar. If the
5518 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5519
5520 if ((dt[1] == vect_internal_def
5521 || dt[1] == vect_induction_def
5522 || dt[1] == vect_nested_cycle)
5523 && !slp_node)
5524 scalar_shift_arg = false;
5525 else if (dt[1] == vect_constant_def
5526 || dt[1] == vect_external_def
5527 || dt[1] == vect_internal_def)
5528 {
5529 /* In SLP, need to check whether the shift count is the same,
5530 in loops if it is a constant or invariant, it is always
5531 a scalar shift. */
5532 if (slp_node)
5533 {
5534 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5535 stmt_vec_info slpstmt_info;
5536
5537 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5538 {
5539 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5540 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5541 scalar_shift_arg = false;
5542 }
5543 }
5544
5545 /* If the shift amount is computed by a pattern stmt we cannot
5546 use the scalar amount directly thus give up and use a vector
5547 shift. */
5548 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5549 scalar_shift_arg = false;
5550 }
5551 else
5552 {
5553 if (dump_enabled_p ())
5554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5555 "operand mode requires invariant argument.\n");
5556 return false;
5557 }
5558
5559 /* Vector shifted by vector. */
5560 if (!scalar_shift_arg)
5561 {
5562 optab = optab_for_tree_code (code, vectype, optab_vector);
5563 if (dump_enabled_p ())
5564 dump_printf_loc (MSG_NOTE, vect_location,
5565 "vector/vector shift/rotate found.\n");
5566
5567 if (!op1_vectype)
5568 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5569 if (op1_vectype == NULL_TREE
5570 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5571 {
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574 "unusable type for last operand in"
5575 " vector/vector shift/rotate.\n");
5576 return false;
5577 }
5578 }
5579 /* See if the machine has a vector shifted by scalar insn and if not
5580 then see if it has a vector shifted by vector insn. */
5581 else
5582 {
5583 optab = optab_for_tree_code (code, vectype, optab_scalar);
5584 if (optab
5585 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5586 {
5587 if (dump_enabled_p ())
5588 dump_printf_loc (MSG_NOTE, vect_location,
5589 "vector/scalar shift/rotate found.\n");
5590 }
5591 else
5592 {
5593 optab = optab_for_tree_code (code, vectype, optab_vector);
5594 if (optab
5595 && (optab_handler (optab, TYPE_MODE (vectype))
5596 != CODE_FOR_nothing))
5597 {
5598 scalar_shift_arg = false;
5599
5600 if (dump_enabled_p ())
5601 dump_printf_loc (MSG_NOTE, vect_location,
5602 "vector/vector shift/rotate found.\n");
5603
5604 /* Unlike the other binary operators, shifts/rotates have
5605 the rhs being int, instead of the same type as the lhs,
5606 so make sure the scalar is the right type if we are
5607 dealing with vectors of long long/long/short/char. */
5608 if (dt[1] == vect_constant_def)
5609 op1 = fold_convert (TREE_TYPE (vectype), op1);
5610 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5611 TREE_TYPE (op1)))
5612 {
5613 if (slp_node
5614 && TYPE_MODE (TREE_TYPE (vectype))
5615 != TYPE_MODE (TREE_TYPE (op1)))
5616 {
5617 if (dump_enabled_p ())
5618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5619 "unusable type for last operand in"
5620 " vector/vector shift/rotate.\n");
5621 return false;
5622 }
5623 if (vec_stmt && !slp_node)
5624 {
5625 op1 = fold_convert (TREE_TYPE (vectype), op1);
5626 op1 = vect_init_vector (stmt_info, op1,
5627 TREE_TYPE (vectype), NULL);
5628 }
5629 }
5630 }
5631 }
5632 }
5633
5634 /* Supportable by target? */
5635 if (!optab)
5636 {
5637 if (dump_enabled_p ())
5638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5639 "no optab.\n");
5640 return false;
5641 }
5642 vec_mode = TYPE_MODE (vectype);
5643 icode = (int) optab_handler (optab, vec_mode);
5644 if (icode == CODE_FOR_nothing)
5645 {
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5648 "op not supported by target.\n");
5649 /* Check only during analysis. */
5650 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5651 || (!vec_stmt
5652 && !vect_worthwhile_without_simd_p (vinfo, code)))
5653 return false;
5654 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_NOTE, vect_location,
5656 "proceeding using word mode.\n");
5657 }
5658
5659 /* Worthwhile without SIMD support? Check only during analysis. */
5660 if (!vec_stmt
5661 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5662 && !vect_worthwhile_without_simd_p (vinfo, code))
5663 {
5664 if (dump_enabled_p ())
5665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5666 "not worthwhile without SIMD support.\n");
5667 return false;
5668 }
5669
5670 if (!vec_stmt) /* transformation not required. */
5671 {
5672 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5673 DUMP_VECT_SCOPE ("vectorizable_shift");
5674 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5675 return true;
5676 }
5677
5678 /* Transform. */
5679
5680 if (dump_enabled_p ())
5681 dump_printf_loc (MSG_NOTE, vect_location,
5682 "transform binary/unary operation.\n");
5683
5684 /* Handle def. */
5685 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5686
5687 prev_stmt_info = NULL;
5688 for (j = 0; j < ncopies; j++)
5689 {
5690 /* Handle uses. */
5691 if (j == 0)
5692 {
5693 if (scalar_shift_arg)
5694 {
5695 /* Vector shl and shr insn patterns can be defined with scalar
5696 operand 2 (shift operand). In this case, use constant or loop
5697 invariant op1 directly, without extending it to vector mode
5698 first. */
5699 optab_op2_mode = insn_data[icode].operand[2].mode;
5700 if (!VECTOR_MODE_P (optab_op2_mode))
5701 {
5702 if (dump_enabled_p ())
5703 dump_printf_loc (MSG_NOTE, vect_location,
5704 "operand 1 using scalar mode.\n");
5705 vec_oprnd1 = op1;
5706 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5707 vec_oprnds1.quick_push (vec_oprnd1);
5708 if (slp_node)
5709 {
5710 /* Store vec_oprnd1 for every vector stmt to be created
5711 for SLP_NODE. We check during the analysis that all
5712 the shift arguments are the same.
5713 TODO: Allow different constants for different vector
5714 stmts generated for an SLP instance. */
5715 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5716 vec_oprnds1.quick_push (vec_oprnd1);
5717 }
5718 }
5719 }
5720
5721 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5722 (a special case for certain kind of vector shifts); otherwise,
5723 operand 1 should be of a vector type (the usual case). */
5724 if (vec_oprnd1)
5725 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5726 slp_node);
5727 else
5728 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5729 slp_node);
5730 }
5731 else
5732 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5733
5734 /* Arguments are ready. Create the new vector stmt. */
5735 stmt_vec_info new_stmt_info = NULL;
5736 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5737 {
5738 vop1 = vec_oprnds1[i];
5739 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5740 new_temp = make_ssa_name (vec_dest, new_stmt);
5741 gimple_assign_set_lhs (new_stmt, new_temp);
5742 new_stmt_info
5743 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5744 if (slp_node)
5745 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5746 }
5747
5748 if (slp_node)
5749 continue;
5750
5751 if (j == 0)
5752 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5753 else
5754 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5755 prev_stmt_info = new_stmt_info;
5756 }
5757
5758 vec_oprnds0.release ();
5759 vec_oprnds1.release ();
5760
5761 return true;
5762 }
5763
5764
5765 /* Function vectorizable_operation.
5766
5767 Check if STMT_INFO performs a binary, unary or ternary operation that can
5768 be vectorized.
5769 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5770 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5771 Return true if STMT_INFO is vectorizable in this way. */
5772
5773 static bool
5774 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5775 stmt_vec_info *vec_stmt, slp_tree slp_node,
5776 stmt_vector_for_cost *cost_vec)
5777 {
5778 tree vec_dest;
5779 tree scalar_dest;
5780 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5781 tree vectype;
5782 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5783 enum tree_code code, orig_code;
5784 machine_mode vec_mode;
5785 tree new_temp;
5786 int op_type;
5787 optab optab;
5788 bool target_support_p;
5789 enum vect_def_type dt[3]
5790 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5791 int ndts = 3;
5792 stmt_vec_info prev_stmt_info;
5793 poly_uint64 nunits_in;
5794 poly_uint64 nunits_out;
5795 tree vectype_out;
5796 int ncopies;
5797 int j, i;
5798 vec<tree> vec_oprnds0 = vNULL;
5799 vec<tree> vec_oprnds1 = vNULL;
5800 vec<tree> vec_oprnds2 = vNULL;
5801 tree vop0, vop1, vop2;
5802 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5803 vec_info *vinfo = stmt_info->vinfo;
5804
5805 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5806 return false;
5807
5808 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5809 && ! vec_stmt)
5810 return false;
5811
5812 /* Is STMT a vectorizable binary/unary operation? */
5813 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5814 if (!stmt)
5815 return false;
5816
5817 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5818 return false;
5819
5820 orig_code = code = gimple_assign_rhs_code (stmt);
5821
5822 /* For pointer addition and subtraction, we should use the normal
5823 plus and minus for the vector operation. */
5824 if (code == POINTER_PLUS_EXPR)
5825 code = PLUS_EXPR;
5826 if (code == POINTER_DIFF_EXPR)
5827 code = MINUS_EXPR;
5828
5829 /* Support only unary or binary operations. */
5830 op_type = TREE_CODE_LENGTH (code);
5831 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5832 {
5833 if (dump_enabled_p ())
5834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5835 "num. args = %d (not unary/binary/ternary op).\n",
5836 op_type);
5837 return false;
5838 }
5839
5840 scalar_dest = gimple_assign_lhs (stmt);
5841 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5842
5843 /* Most operations cannot handle bit-precision types without extra
5844 truncations. */
5845 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5846 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5847 /* Exception are bitwise binary operations. */
5848 && code != BIT_IOR_EXPR
5849 && code != BIT_XOR_EXPR
5850 && code != BIT_AND_EXPR)
5851 {
5852 if (dump_enabled_p ())
5853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5854 "bit-precision arithmetic not supported.\n");
5855 return false;
5856 }
5857
5858 op0 = gimple_assign_rhs1 (stmt);
5859 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5860 {
5861 if (dump_enabled_p ())
5862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5863 "use not simple.\n");
5864 return false;
5865 }
5866 /* If op0 is an external or constant def use a vector type with
5867 the same size as the output vector type. */
5868 if (!vectype)
5869 {
5870 /* For boolean type we cannot determine vectype by
5871 invariant value (don't know whether it is a vector
5872 of booleans or vector of integers). We use output
5873 vectype because operations on boolean don't change
5874 type. */
5875 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5876 {
5877 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5878 {
5879 if (dump_enabled_p ())
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 "not supported operation on bool value.\n");
5882 return false;
5883 }
5884 vectype = vectype_out;
5885 }
5886 else
5887 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5888 }
5889 if (vec_stmt)
5890 gcc_assert (vectype);
5891 if (!vectype)
5892 {
5893 if (dump_enabled_p ())
5894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5895 "no vectype for scalar type %T\n",
5896 TREE_TYPE (op0));
5897
5898 return false;
5899 }
5900
5901 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5902 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5903 if (maybe_ne (nunits_out, nunits_in))
5904 return false;
5905
5906 if (op_type == binary_op || op_type == ternary_op)
5907 {
5908 op1 = gimple_assign_rhs2 (stmt);
5909 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5910 {
5911 if (dump_enabled_p ())
5912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5913 "use not simple.\n");
5914 return false;
5915 }
5916 }
5917 if (op_type == ternary_op)
5918 {
5919 op2 = gimple_assign_rhs3 (stmt);
5920 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5921 {
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5924 "use not simple.\n");
5925 return false;
5926 }
5927 }
5928
5929 /* Multiple types in SLP are handled by creating the appropriate number of
5930 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5931 case of SLP. */
5932 if (slp_node)
5933 ncopies = 1;
5934 else
5935 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5936
5937 gcc_assert (ncopies >= 1);
5938
5939 /* Shifts are handled in vectorizable_shift (). */
5940 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5941 || code == RROTATE_EXPR)
5942 return false;
5943
5944 /* Supportable by target? */
5945
5946 vec_mode = TYPE_MODE (vectype);
5947 if (code == MULT_HIGHPART_EXPR)
5948 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5949 else
5950 {
5951 optab = optab_for_tree_code (code, vectype, optab_default);
5952 if (!optab)
5953 {
5954 if (dump_enabled_p ())
5955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5956 "no optab.\n");
5957 return false;
5958 }
5959 target_support_p = (optab_handler (optab, vec_mode)
5960 != CODE_FOR_nothing);
5961 }
5962
5963 if (!target_support_p)
5964 {
5965 if (dump_enabled_p ())
5966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5967 "op not supported by target.\n");
5968 /* Check only during analysis. */
5969 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5970 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5971 return false;
5972 if (dump_enabled_p ())
5973 dump_printf_loc (MSG_NOTE, vect_location,
5974 "proceeding using word mode.\n");
5975 }
5976
5977 /* Worthwhile without SIMD support? Check only during analysis. */
5978 if (!VECTOR_MODE_P (vec_mode)
5979 && !vec_stmt
5980 && !vect_worthwhile_without_simd_p (vinfo, code))
5981 {
5982 if (dump_enabled_p ())
5983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5984 "not worthwhile without SIMD support.\n");
5985 return false;
5986 }
5987
5988 if (!vec_stmt) /* transformation not required. */
5989 {
5990 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5991 DUMP_VECT_SCOPE ("vectorizable_operation");
5992 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5993 return true;
5994 }
5995
5996 /* Transform. */
5997
5998 if (dump_enabled_p ())
5999 dump_printf_loc (MSG_NOTE, vect_location,
6000 "transform binary/unary operation.\n");
6001
6002 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6003 vectors with unsigned elements, but the result is signed. So, we
6004 need to compute the MINUS_EXPR into vectype temporary and
6005 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6006 tree vec_cvt_dest = NULL_TREE;
6007 if (orig_code == POINTER_DIFF_EXPR)
6008 {
6009 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6010 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6011 }
6012 /* Handle def. */
6013 else
6014 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6015
6016 /* In case the vectorization factor (VF) is bigger than the number
6017 of elements that we can fit in a vectype (nunits), we have to generate
6018 more than one vector stmt - i.e - we need to "unroll" the
6019 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6020 from one copy of the vector stmt to the next, in the field
6021 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6022 stages to find the correct vector defs to be used when vectorizing
6023 stmts that use the defs of the current stmt. The example below
6024 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6025 we need to create 4 vectorized stmts):
6026
6027 before vectorization:
6028 RELATED_STMT VEC_STMT
6029 S1: x = memref - -
6030 S2: z = x + 1 - -
6031
6032 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6033 there):
6034 RELATED_STMT VEC_STMT
6035 VS1_0: vx0 = memref0 VS1_1 -
6036 VS1_1: vx1 = memref1 VS1_2 -
6037 VS1_2: vx2 = memref2 VS1_3 -
6038 VS1_3: vx3 = memref3 - -
6039 S1: x = load - VS1_0
6040 S2: z = x + 1 - -
6041
6042 step2: vectorize stmt S2 (done here):
6043 To vectorize stmt S2 we first need to find the relevant vector
6044 def for the first operand 'x'. This is, as usual, obtained from
6045 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6046 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6047 relevant vector def 'vx0'. Having found 'vx0' we can generate
6048 the vector stmt VS2_0, and as usual, record it in the
6049 STMT_VINFO_VEC_STMT of stmt S2.
6050 When creating the second copy (VS2_1), we obtain the relevant vector
6051 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6052 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6053 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6054 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6055 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6056 chain of stmts and pointers:
6057 RELATED_STMT VEC_STMT
6058 VS1_0: vx0 = memref0 VS1_1 -
6059 VS1_1: vx1 = memref1 VS1_2 -
6060 VS1_2: vx2 = memref2 VS1_3 -
6061 VS1_3: vx3 = memref3 - -
6062 S1: x = load - VS1_0
6063 VS2_0: vz0 = vx0 + v1 VS2_1 -
6064 VS2_1: vz1 = vx1 + v1 VS2_2 -
6065 VS2_2: vz2 = vx2 + v1 VS2_3 -
6066 VS2_3: vz3 = vx3 + v1 - -
6067 S2: z = x + 1 - VS2_0 */
6068
6069 prev_stmt_info = NULL;
6070 for (j = 0; j < ncopies; j++)
6071 {
6072 /* Handle uses. */
6073 if (j == 0)
6074 {
6075 if (op_type == binary_op)
6076 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6077 slp_node);
6078 else if (op_type == ternary_op)
6079 {
6080 if (slp_node)
6081 {
6082 auto_vec<tree> ops(3);
6083 ops.quick_push (op0);
6084 ops.quick_push (op1);
6085 ops.quick_push (op2);
6086 auto_vec<vec<tree> > vec_defs(3);
6087 vect_get_slp_defs (ops, slp_node, &vec_defs);
6088 vec_oprnds0 = vec_defs[0];
6089 vec_oprnds1 = vec_defs[1];
6090 vec_oprnds2 = vec_defs[2];
6091 }
6092 else
6093 {
6094 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6095 &vec_oprnds1, NULL);
6096 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6097 NULL, NULL);
6098 }
6099 }
6100 else
6101 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6102 slp_node);
6103 }
6104 else
6105 {
6106 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6107 if (op_type == ternary_op)
6108 {
6109 tree vec_oprnd = vec_oprnds2.pop ();
6110 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6111 vec_oprnd));
6112 }
6113 }
6114
6115 /* Arguments are ready. Create the new vector stmt. */
6116 stmt_vec_info new_stmt_info = NULL;
6117 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6118 {
6119 vop1 = ((op_type == binary_op || op_type == ternary_op)
6120 ? vec_oprnds1[i] : NULL_TREE);
6121 vop2 = ((op_type == ternary_op)
6122 ? vec_oprnds2[i] : NULL_TREE);
6123 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6124 vop0, vop1, vop2);
6125 new_temp = make_ssa_name (vec_dest, new_stmt);
6126 gimple_assign_set_lhs (new_stmt, new_temp);
6127 new_stmt_info
6128 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6129 if (vec_cvt_dest)
6130 {
6131 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6132 gassign *new_stmt
6133 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6134 new_temp);
6135 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6136 gimple_assign_set_lhs (new_stmt, new_temp);
6137 new_stmt_info
6138 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6139 }
6140 if (slp_node)
6141 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6142 }
6143
6144 if (slp_node)
6145 continue;
6146
6147 if (j == 0)
6148 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6149 else
6150 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6151 prev_stmt_info = new_stmt_info;
6152 }
6153
6154 vec_oprnds0.release ();
6155 vec_oprnds1.release ();
6156 vec_oprnds2.release ();
6157
6158 return true;
6159 }
6160
6161 /* A helper function to ensure data reference DR_INFO's base alignment. */
6162
6163 static void
6164 ensure_base_align (dr_vec_info *dr_info)
6165 {
6166 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6167 return;
6168
6169 if (dr_info->base_misaligned)
6170 {
6171 tree base_decl = dr_info->base_decl;
6172
6173 // We should only be able to increase the alignment of a base object if
6174 // we know what its new alignment should be at compile time.
6175 unsigned HOST_WIDE_INT align_base_to =
6176 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6177
6178 if (decl_in_symtab_p (base_decl))
6179 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6180 else
6181 {
6182 SET_DECL_ALIGN (base_decl, align_base_to);
6183 DECL_USER_ALIGN (base_decl) = 1;
6184 }
6185 dr_info->base_misaligned = false;
6186 }
6187 }
6188
6189
6190 /* Function get_group_alias_ptr_type.
6191
6192 Return the alias type for the group starting at FIRST_STMT_INFO. */
6193
6194 static tree
6195 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6196 {
6197 struct data_reference *first_dr, *next_dr;
6198
6199 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6200 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6201 while (next_stmt_info)
6202 {
6203 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6204 if (get_alias_set (DR_REF (first_dr))
6205 != get_alias_set (DR_REF (next_dr)))
6206 {
6207 if (dump_enabled_p ())
6208 dump_printf_loc (MSG_NOTE, vect_location,
6209 "conflicting alias set types.\n");
6210 return ptr_type_node;
6211 }
6212 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6213 }
6214 return reference_alias_ptr_type (DR_REF (first_dr));
6215 }
6216
6217
6218 /* Function vectorizable_store.
6219
6220 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6221 that can be vectorized.
6222 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6223 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6224 Return true if STMT_INFO is vectorizable in this way. */
6225
6226 static bool
6227 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6228 stmt_vec_info *vec_stmt, slp_tree slp_node,
6229 stmt_vector_for_cost *cost_vec)
6230 {
6231 tree data_ref;
6232 tree op;
6233 tree vec_oprnd = NULL_TREE;
6234 tree elem_type;
6235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6236 struct loop *loop = NULL;
6237 machine_mode vec_mode;
6238 tree dummy;
6239 enum dr_alignment_support alignment_support_scheme;
6240 enum vect_def_type rhs_dt = vect_unknown_def_type;
6241 enum vect_def_type mask_dt = vect_unknown_def_type;
6242 stmt_vec_info prev_stmt_info = NULL;
6243 tree dataref_ptr = NULL_TREE;
6244 tree dataref_offset = NULL_TREE;
6245 gimple *ptr_incr = NULL;
6246 int ncopies;
6247 int j;
6248 stmt_vec_info first_stmt_info;
6249 bool grouped_store;
6250 unsigned int group_size, i;
6251 vec<tree> oprnds = vNULL;
6252 vec<tree> result_chain = vNULL;
6253 tree offset = NULL_TREE;
6254 vec<tree> vec_oprnds = vNULL;
6255 bool slp = (slp_node != NULL);
6256 unsigned int vec_num;
6257 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6258 vec_info *vinfo = stmt_info->vinfo;
6259 tree aggr_type;
6260 gather_scatter_info gs_info;
6261 poly_uint64 vf;
6262 vec_load_store_type vls_type;
6263 tree ref_type;
6264
6265 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6266 return false;
6267
6268 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6269 && ! vec_stmt)
6270 return false;
6271
6272 /* Is vectorizable store? */
6273
6274 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6275 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6276 {
6277 tree scalar_dest = gimple_assign_lhs (assign);
6278 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6279 && is_pattern_stmt_p (stmt_info))
6280 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6281 if (TREE_CODE (scalar_dest) != ARRAY_REF
6282 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6283 && TREE_CODE (scalar_dest) != INDIRECT_REF
6284 && TREE_CODE (scalar_dest) != COMPONENT_REF
6285 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6286 && TREE_CODE (scalar_dest) != REALPART_EXPR
6287 && TREE_CODE (scalar_dest) != MEM_REF)
6288 return false;
6289 }
6290 else
6291 {
6292 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6293 if (!call || !gimple_call_internal_p (call))
6294 return false;
6295
6296 internal_fn ifn = gimple_call_internal_fn (call);
6297 if (!internal_store_fn_p (ifn))
6298 return false;
6299
6300 if (slp_node != NULL)
6301 {
6302 if (dump_enabled_p ())
6303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6304 "SLP of masked stores not supported.\n");
6305 return false;
6306 }
6307
6308 int mask_index = internal_fn_mask_index (ifn);
6309 if (mask_index >= 0)
6310 {
6311 mask = gimple_call_arg (call, mask_index);
6312 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6313 &mask_vectype))
6314 return false;
6315 }
6316 }
6317
6318 op = vect_get_store_rhs (stmt_info);
6319
6320 /* Cannot have hybrid store SLP -- that would mean storing to the
6321 same location twice. */
6322 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6323
6324 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6325 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6326
6327 if (loop_vinfo)
6328 {
6329 loop = LOOP_VINFO_LOOP (loop_vinfo);
6330 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6331 }
6332 else
6333 vf = 1;
6334
6335 /* Multiple types in SLP are handled by creating the appropriate number of
6336 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6337 case of SLP. */
6338 if (slp)
6339 ncopies = 1;
6340 else
6341 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6342
6343 gcc_assert (ncopies >= 1);
6344
6345 /* FORNOW. This restriction should be relaxed. */
6346 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6347 {
6348 if (dump_enabled_p ())
6349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6350 "multiple types in nested loop.\n");
6351 return false;
6352 }
6353
6354 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6355 return false;
6356
6357 elem_type = TREE_TYPE (vectype);
6358 vec_mode = TYPE_MODE (vectype);
6359
6360 if (!STMT_VINFO_DATA_REF (stmt_info))
6361 return false;
6362
6363 vect_memory_access_type memory_access_type;
6364 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6365 &memory_access_type, &gs_info))
6366 return false;
6367
6368 if (mask)
6369 {
6370 if (memory_access_type == VMAT_CONTIGUOUS)
6371 {
6372 if (!VECTOR_MODE_P (vec_mode)
6373 || !can_vec_mask_load_store_p (vec_mode,
6374 TYPE_MODE (mask_vectype), false))
6375 return false;
6376 }
6377 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6378 && (memory_access_type != VMAT_GATHER_SCATTER
6379 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
6380 {
6381 if (dump_enabled_p ())
6382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6383 "unsupported access type for masked store.\n");
6384 return false;
6385 }
6386 }
6387 else
6388 {
6389 /* FORNOW. In some cases can vectorize even if data-type not supported
6390 (e.g. - array initialization with 0). */
6391 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6392 return false;
6393 }
6394
6395 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
6396 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6397 && memory_access_type != VMAT_GATHER_SCATTER
6398 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6399 if (grouped_store)
6400 {
6401 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6402 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6403 group_size = DR_GROUP_SIZE (first_stmt_info);
6404 }
6405 else
6406 {
6407 first_stmt_info = stmt_info;
6408 first_dr_info = dr_info;
6409 group_size = vec_num = 1;
6410 }
6411
6412 if (!vec_stmt) /* transformation not required. */
6413 {
6414 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6415
6416 if (loop_vinfo
6417 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6418 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6419 memory_access_type, &gs_info);
6420
6421 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6422 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6423 vls_type, slp_node, cost_vec);
6424 return true;
6425 }
6426 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6427
6428 /* Transform. */
6429
6430 ensure_base_align (dr_info);
6431
6432 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6433 {
6434 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6435 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6436 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6437 tree ptr, var, scale, vec_mask;
6438 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
6439 tree mask_halfvectype = mask_vectype;
6440 edge pe = loop_preheader_edge (loop);
6441 gimple_seq seq;
6442 basic_block new_bb;
6443 enum { NARROW, NONE, WIDEN } modifier;
6444 poly_uint64 scatter_off_nunits
6445 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6446
6447 if (known_eq (nunits, scatter_off_nunits))
6448 modifier = NONE;
6449 else if (known_eq (nunits * 2, scatter_off_nunits))
6450 {
6451 modifier = WIDEN;
6452
6453 /* Currently gathers and scatters are only supported for
6454 fixed-length vectors. */
6455 unsigned int count = scatter_off_nunits.to_constant ();
6456 vec_perm_builder sel (count, count, 1);
6457 for (i = 0; i < (unsigned int) count; ++i)
6458 sel.quick_push (i | (count / 2));
6459
6460 vec_perm_indices indices (sel, 1, count);
6461 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6462 indices);
6463 gcc_assert (perm_mask != NULL_TREE);
6464 }
6465 else if (known_eq (nunits, scatter_off_nunits * 2))
6466 {
6467 modifier = NARROW;
6468
6469 /* Currently gathers and scatters are only supported for
6470 fixed-length vectors. */
6471 unsigned int count = nunits.to_constant ();
6472 vec_perm_builder sel (count, count, 1);
6473 for (i = 0; i < (unsigned int) count; ++i)
6474 sel.quick_push (i | (count / 2));
6475
6476 vec_perm_indices indices (sel, 2, count);
6477 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6478 gcc_assert (perm_mask != NULL_TREE);
6479 ncopies *= 2;
6480
6481 if (mask)
6482 mask_halfvectype
6483 = build_same_sized_truth_vector_type (gs_info.offset_vectype);
6484 }
6485 else
6486 gcc_unreachable ();
6487
6488 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6489 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6490 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6491 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6492 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6493 scaletype = TREE_VALUE (arglist);
6494
6495 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6496 && TREE_CODE (rettype) == VOID_TYPE);
6497
6498 ptr = fold_convert (ptrtype, gs_info.base);
6499 if (!is_gimple_min_invariant (ptr))
6500 {
6501 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6502 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6503 gcc_assert (!new_bb);
6504 }
6505
6506 if (mask == NULL_TREE)
6507 {
6508 mask_arg = build_int_cst (masktype, -1);
6509 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
6510 }
6511
6512 scale = build_int_cst (scaletype, gs_info.scale);
6513
6514 prev_stmt_info = NULL;
6515 for (j = 0; j < ncopies; ++j)
6516 {
6517 if (j == 0)
6518 {
6519 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
6520 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
6521 stmt_info);
6522 if (mask)
6523 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
6524 stmt_info);
6525 }
6526 else if (modifier != NONE && (j & 1))
6527 {
6528 if (modifier == WIDEN)
6529 {
6530 src
6531 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6532 vec_oprnd1);
6533 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6534 stmt_info, gsi);
6535 if (mask)
6536 mask_op
6537 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6538 vec_mask);
6539 }
6540 else if (modifier == NARROW)
6541 {
6542 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6543 stmt_info, gsi);
6544 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6545 vec_oprnd0);
6546 }
6547 else
6548 gcc_unreachable ();
6549 }
6550 else
6551 {
6552 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
6553 vec_oprnd1);
6554 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
6555 vec_oprnd0);
6556 if (mask)
6557 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
6558 vec_mask);
6559 }
6560
6561 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6562 {
6563 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6564 TYPE_VECTOR_SUBPARTS (srctype)));
6565 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6566 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6567 gassign *new_stmt
6568 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6569 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6570 src = var;
6571 }
6572
6573 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6574 {
6575 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6576 TYPE_VECTOR_SUBPARTS (idxtype)));
6577 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6578 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6579 gassign *new_stmt
6580 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6581 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6582 op = var;
6583 }
6584
6585 if (mask)
6586 {
6587 tree utype;
6588 mask_arg = mask_op;
6589 if (modifier == NARROW)
6590 {
6591 var = vect_get_new_ssa_name (mask_halfvectype,
6592 vect_simple_var);
6593 gassign *new_stmt
6594 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
6595 : VEC_UNPACK_LO_EXPR,
6596 mask_op);
6597 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6598 mask_arg = var;
6599 }
6600 tree optype = TREE_TYPE (mask_arg);
6601 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
6602 utype = masktype;
6603 else
6604 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
6605 var = vect_get_new_ssa_name (utype, vect_scalar_var);
6606 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
6607 gassign *new_stmt
6608 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
6609 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6610 mask_arg = var;
6611 if (!useless_type_conversion_p (masktype, utype))
6612 {
6613 gcc_assert (TYPE_PRECISION (utype)
6614 <= TYPE_PRECISION (masktype));
6615 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
6616 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
6617 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6618 mask_arg = var;
6619 }
6620 }
6621
6622 gcall *new_stmt
6623 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
6624 stmt_vec_info new_stmt_info
6625 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6626
6627 if (prev_stmt_info == NULL)
6628 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6629 else
6630 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6631 prev_stmt_info = new_stmt_info;
6632 }
6633 return true;
6634 }
6635
6636 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6637 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6638
6639 if (grouped_store)
6640 {
6641 /* FORNOW */
6642 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6643
6644 /* We vectorize all the stmts of the interleaving group when we
6645 reach the last stmt in the group. */
6646 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6647 < DR_GROUP_SIZE (first_stmt_info)
6648 && !slp)
6649 {
6650 *vec_stmt = NULL;
6651 return true;
6652 }
6653
6654 if (slp)
6655 {
6656 grouped_store = false;
6657 /* VEC_NUM is the number of vect stmts to be created for this
6658 group. */
6659 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6660 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6661 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6662 == first_stmt_info);
6663 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
6664 op = vect_get_store_rhs (first_stmt_info);
6665 }
6666 else
6667 /* VEC_NUM is the number of vect stmts to be created for this
6668 group. */
6669 vec_num = group_size;
6670
6671 ref_type = get_group_alias_ptr_type (first_stmt_info);
6672 }
6673 else
6674 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
6675
6676 if (dump_enabled_p ())
6677 dump_printf_loc (MSG_NOTE, vect_location,
6678 "transform store. ncopies = %d\n", ncopies);
6679
6680 if (memory_access_type == VMAT_ELEMENTWISE
6681 || memory_access_type == VMAT_STRIDED_SLP)
6682 {
6683 gimple_stmt_iterator incr_gsi;
6684 bool insert_after;
6685 gimple *incr;
6686 tree offvar;
6687 tree ivstep;
6688 tree running_off;
6689 tree stride_base, stride_step, alias_off;
6690 tree vec_oprnd;
6691 unsigned int g;
6692 /* Checked by get_load_store_type. */
6693 unsigned int const_nunits = nunits.to_constant ();
6694
6695 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6696 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6697
6698 stride_base
6699 = fold_build_pointer_plus
6700 (DR_BASE_ADDRESS (first_dr_info->dr),
6701 size_binop (PLUS_EXPR,
6702 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
6703 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
6704 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
6705
6706 /* For a store with loop-invariant (but other than power-of-2)
6707 stride (i.e. not a grouped access) like so:
6708
6709 for (i = 0; i < n; i += stride)
6710 array[i] = ...;
6711
6712 we generate a new induction variable and new stores from
6713 the components of the (vectorized) rhs:
6714
6715 for (j = 0; ; j += VF*stride)
6716 vectemp = ...;
6717 tmp1 = vectemp[0];
6718 array[j] = tmp1;
6719 tmp2 = vectemp[1];
6720 array[j + stride] = tmp2;
6721 ...
6722 */
6723
6724 unsigned nstores = const_nunits;
6725 unsigned lnel = 1;
6726 tree ltype = elem_type;
6727 tree lvectype = vectype;
6728 if (slp)
6729 {
6730 if (group_size < const_nunits
6731 && const_nunits % group_size == 0)
6732 {
6733 nstores = const_nunits / group_size;
6734 lnel = group_size;
6735 ltype = build_vector_type (elem_type, group_size);
6736 lvectype = vectype;
6737
6738 /* First check if vec_extract optab doesn't support extraction
6739 of vector elts directly. */
6740 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6741 machine_mode vmode;
6742 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6743 || !VECTOR_MODE_P (vmode)
6744 || !targetm.vector_mode_supported_p (vmode)
6745 || (convert_optab_handler (vec_extract_optab,
6746 TYPE_MODE (vectype), vmode)
6747 == CODE_FOR_nothing))
6748 {
6749 /* Try to avoid emitting an extract of vector elements
6750 by performing the extracts using an integer type of the
6751 same size, extracting from a vector of those and then
6752 re-interpreting it as the original vector type if
6753 supported. */
6754 unsigned lsize
6755 = group_size * GET_MODE_BITSIZE (elmode);
6756 unsigned int lnunits = const_nunits / group_size;
6757 /* If we can't construct such a vector fall back to
6758 element extracts from the original vector type and
6759 element size stores. */
6760 if (int_mode_for_size (lsize, 0).exists (&elmode)
6761 && mode_for_vector (elmode, lnunits).exists (&vmode)
6762 && VECTOR_MODE_P (vmode)
6763 && targetm.vector_mode_supported_p (vmode)
6764 && (convert_optab_handler (vec_extract_optab,
6765 vmode, elmode)
6766 != CODE_FOR_nothing))
6767 {
6768 nstores = lnunits;
6769 lnel = group_size;
6770 ltype = build_nonstandard_integer_type (lsize, 1);
6771 lvectype = build_vector_type (ltype, nstores);
6772 }
6773 /* Else fall back to vector extraction anyway.
6774 Fewer stores are more important than avoiding spilling
6775 of the vector we extract from. Compared to the
6776 construction case in vectorizable_load no store-forwarding
6777 issue exists here for reasonable archs. */
6778 }
6779 }
6780 else if (group_size >= const_nunits
6781 && group_size % const_nunits == 0)
6782 {
6783 nstores = 1;
6784 lnel = const_nunits;
6785 ltype = vectype;
6786 lvectype = vectype;
6787 }
6788 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6789 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6790 }
6791
6792 ivstep = stride_step;
6793 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6794 build_int_cst (TREE_TYPE (ivstep), vf));
6795
6796 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6797
6798 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6799 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6800 create_iv (stride_base, ivstep, NULL,
6801 loop, &incr_gsi, insert_after,
6802 &offvar, NULL);
6803 incr = gsi_stmt (incr_gsi);
6804 loop_vinfo->add_stmt (incr);
6805
6806 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6807
6808 prev_stmt_info = NULL;
6809 alias_off = build_int_cst (ref_type, 0);
6810 stmt_vec_info next_stmt_info = first_stmt_info;
6811 for (g = 0; g < group_size; g++)
6812 {
6813 running_off = offvar;
6814 if (g)
6815 {
6816 tree size = TYPE_SIZE_UNIT (ltype);
6817 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6818 size);
6819 tree newoff = copy_ssa_name (running_off, NULL);
6820 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6821 running_off, pos);
6822 vect_finish_stmt_generation (stmt_info, incr, gsi);
6823 running_off = newoff;
6824 }
6825 unsigned int group_el = 0;
6826 unsigned HOST_WIDE_INT
6827 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6828 for (j = 0; j < ncopies; j++)
6829 {
6830 /* We've set op and dt above, from vect_get_store_rhs,
6831 and first_stmt_info == stmt_info. */
6832 if (j == 0)
6833 {
6834 if (slp)
6835 {
6836 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6837 &vec_oprnds, NULL, slp_node);
6838 vec_oprnd = vec_oprnds[0];
6839 }
6840 else
6841 {
6842 op = vect_get_store_rhs (next_stmt_info);
6843 vec_oprnd = vect_get_vec_def_for_operand
6844 (op, next_stmt_info);
6845 }
6846 }
6847 else
6848 {
6849 if (slp)
6850 vec_oprnd = vec_oprnds[j];
6851 else
6852 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6853 vec_oprnd);
6854 }
6855 /* Pun the vector to extract from if necessary. */
6856 if (lvectype != vectype)
6857 {
6858 tree tem = make_ssa_name (lvectype);
6859 gimple *pun
6860 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6861 lvectype, vec_oprnd));
6862 vect_finish_stmt_generation (stmt_info, pun, gsi);
6863 vec_oprnd = tem;
6864 }
6865 for (i = 0; i < nstores; i++)
6866 {
6867 tree newref, newoff;
6868 gimple *incr, *assign;
6869 tree size = TYPE_SIZE (ltype);
6870 /* Extract the i'th component. */
6871 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6872 bitsize_int (i), size);
6873 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6874 size, pos);
6875
6876 elem = force_gimple_operand_gsi (gsi, elem, true,
6877 NULL_TREE, true,
6878 GSI_SAME_STMT);
6879
6880 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6881 group_el * elsz);
6882 newref = build2 (MEM_REF, ltype,
6883 running_off, this_off);
6884 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
6885
6886 /* And store it to *running_off. */
6887 assign = gimple_build_assign (newref, elem);
6888 stmt_vec_info assign_info
6889 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6890
6891 group_el += lnel;
6892 if (! slp
6893 || group_el == group_size)
6894 {
6895 newoff = copy_ssa_name (running_off, NULL);
6896 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6897 running_off, stride_step);
6898 vect_finish_stmt_generation (stmt_info, incr, gsi);
6899
6900 running_off = newoff;
6901 group_el = 0;
6902 }
6903 if (g == group_size - 1
6904 && !slp)
6905 {
6906 if (j == 0 && i == 0)
6907 STMT_VINFO_VEC_STMT (stmt_info)
6908 = *vec_stmt = assign_info;
6909 else
6910 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6911 prev_stmt_info = assign_info;
6912 }
6913 }
6914 }
6915 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6916 if (slp)
6917 break;
6918 }
6919
6920 vec_oprnds.release ();
6921 return true;
6922 }
6923
6924 auto_vec<tree> dr_chain (group_size);
6925 oprnds.create (group_size);
6926
6927 alignment_support_scheme
6928 = vect_supportable_dr_alignment (first_dr_info, false);
6929 gcc_assert (alignment_support_scheme);
6930 vec_loop_masks *loop_masks
6931 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6932 ? &LOOP_VINFO_MASKS (loop_vinfo)
6933 : NULL);
6934 /* Targets with store-lane instructions must not require explicit
6935 realignment. vect_supportable_dr_alignment always returns either
6936 dr_aligned or dr_unaligned_supported for masked operations. */
6937 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6938 && !mask
6939 && !loop_masks)
6940 || alignment_support_scheme == dr_aligned
6941 || alignment_support_scheme == dr_unaligned_supported);
6942
6943 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6944 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6945 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6946
6947 tree bump;
6948 tree vec_offset = NULL_TREE;
6949 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6950 {
6951 aggr_type = NULL_TREE;
6952 bump = NULL_TREE;
6953 }
6954 else if (memory_access_type == VMAT_GATHER_SCATTER)
6955 {
6956 aggr_type = elem_type;
6957 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
6958 &bump, &vec_offset);
6959 }
6960 else
6961 {
6962 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6963 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6964 else
6965 aggr_type = vectype;
6966 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
6967 memory_access_type);
6968 }
6969
6970 if (mask)
6971 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6972
6973 /* In case the vectorization factor (VF) is bigger than the number
6974 of elements that we can fit in a vectype (nunits), we have to generate
6975 more than one vector stmt - i.e - we need to "unroll" the
6976 vector stmt by a factor VF/nunits. For more details see documentation in
6977 vect_get_vec_def_for_copy_stmt. */
6978
6979 /* In case of interleaving (non-unit grouped access):
6980
6981 S1: &base + 2 = x2
6982 S2: &base = x0
6983 S3: &base + 1 = x1
6984 S4: &base + 3 = x3
6985
6986 We create vectorized stores starting from base address (the access of the
6987 first stmt in the chain (S2 in the above example), when the last store stmt
6988 of the chain (S4) is reached:
6989
6990 VS1: &base = vx2
6991 VS2: &base + vec_size*1 = vx0
6992 VS3: &base + vec_size*2 = vx1
6993 VS4: &base + vec_size*3 = vx3
6994
6995 Then permutation statements are generated:
6996
6997 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6998 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6999 ...
7000
7001 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7002 (the order of the data-refs in the output of vect_permute_store_chain
7003 corresponds to the order of scalar stmts in the interleaving chain - see
7004 the documentation of vect_permute_store_chain()).
7005
7006 In case of both multiple types and interleaving, above vector stores and
7007 permutation stmts are created for every copy. The result vector stmts are
7008 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7009 STMT_VINFO_RELATED_STMT for the next copies.
7010 */
7011
7012 prev_stmt_info = NULL;
7013 tree vec_mask = NULL_TREE;
7014 for (j = 0; j < ncopies; j++)
7015 {
7016 stmt_vec_info new_stmt_info;
7017 if (j == 0)
7018 {
7019 if (slp)
7020 {
7021 /* Get vectorized arguments for SLP_NODE. */
7022 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
7023 NULL, slp_node);
7024
7025 vec_oprnd = vec_oprnds[0];
7026 }
7027 else
7028 {
7029 /* For interleaved stores we collect vectorized defs for all the
7030 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7031 used as an input to vect_permute_store_chain(), and OPRNDS as
7032 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
7033
7034 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7035 OPRNDS are of size 1. */
7036 stmt_vec_info next_stmt_info = first_stmt_info;
7037 for (i = 0; i < group_size; i++)
7038 {
7039 /* Since gaps are not supported for interleaved stores,
7040 DR_GROUP_SIZE is the exact number of stmts in the chain.
7041 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7042 that there is no interleaving, DR_GROUP_SIZE is 1,
7043 and only one iteration of the loop will be executed. */
7044 op = vect_get_store_rhs (next_stmt_info);
7045 vec_oprnd = vect_get_vec_def_for_operand
7046 (op, next_stmt_info);
7047 dr_chain.quick_push (vec_oprnd);
7048 oprnds.quick_push (vec_oprnd);
7049 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7050 }
7051 if (mask)
7052 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
7053 mask_vectype);
7054 }
7055
7056 /* We should have catched mismatched types earlier. */
7057 gcc_assert (useless_type_conversion_p (vectype,
7058 TREE_TYPE (vec_oprnd)));
7059 bool simd_lane_access_p
7060 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7061 if (simd_lane_access_p
7062 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
7063 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
7064 && integer_zerop (DR_OFFSET (first_dr_info->dr))
7065 && integer_zerop (DR_INIT (first_dr_info->dr))
7066 && alias_sets_conflict_p (get_alias_set (aggr_type),
7067 get_alias_set (TREE_TYPE (ref_type))))
7068 {
7069 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
7070 dataref_offset = build_int_cst (ref_type, 0);
7071 }
7072 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7073 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
7074 &dataref_ptr, &vec_offset);
7075 else
7076 dataref_ptr
7077 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7078 simd_lane_access_p ? loop : NULL,
7079 offset, &dummy, gsi, &ptr_incr,
7080 simd_lane_access_p, NULL_TREE, bump);
7081 }
7082 else
7083 {
7084 /* For interleaved stores we created vectorized defs for all the
7085 defs stored in OPRNDS in the previous iteration (previous copy).
7086 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7087 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7088 next copy.
7089 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7090 OPRNDS are of size 1. */
7091 for (i = 0; i < group_size; i++)
7092 {
7093 op = oprnds[i];
7094 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7095 dr_chain[i] = vec_oprnd;
7096 oprnds[i] = vec_oprnd;
7097 }
7098 if (mask)
7099 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7100 if (dataref_offset)
7101 dataref_offset
7102 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7103 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7104 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7105 else
7106 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7107 stmt_info, bump);
7108 }
7109
7110 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7111 {
7112 tree vec_array;
7113
7114 /* Get an array into which we can store the individual vectors. */
7115 vec_array = create_vector_array (vectype, vec_num);
7116
7117 /* Invalidate the current contents of VEC_ARRAY. This should
7118 become an RTL clobber too, which prevents the vector registers
7119 from being upward-exposed. */
7120 vect_clobber_variable (stmt_info, gsi, vec_array);
7121
7122 /* Store the individual vectors into the array. */
7123 for (i = 0; i < vec_num; i++)
7124 {
7125 vec_oprnd = dr_chain[i];
7126 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7127 }
7128
7129 tree final_mask = NULL;
7130 if (loop_masks)
7131 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7132 vectype, j);
7133 if (vec_mask)
7134 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7135 vec_mask, gsi);
7136
7137 gcall *call;
7138 if (final_mask)
7139 {
7140 /* Emit:
7141 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7142 VEC_ARRAY). */
7143 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7144 tree alias_ptr = build_int_cst (ref_type, align);
7145 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7146 dataref_ptr, alias_ptr,
7147 final_mask, vec_array);
7148 }
7149 else
7150 {
7151 /* Emit:
7152 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7153 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7154 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7155 vec_array);
7156 gimple_call_set_lhs (call, data_ref);
7157 }
7158 gimple_call_set_nothrow (call, true);
7159 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7160
7161 /* Record that VEC_ARRAY is now dead. */
7162 vect_clobber_variable (stmt_info, gsi, vec_array);
7163 }
7164 else
7165 {
7166 new_stmt_info = NULL;
7167 if (grouped_store)
7168 {
7169 if (j == 0)
7170 result_chain.create (group_size);
7171 /* Permute. */
7172 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7173 &result_chain);
7174 }
7175
7176 stmt_vec_info next_stmt_info = first_stmt_info;
7177 for (i = 0; i < vec_num; i++)
7178 {
7179 unsigned misalign;
7180 unsigned HOST_WIDE_INT align;
7181
7182 tree final_mask = NULL_TREE;
7183 if (loop_masks)
7184 final_mask = vect_get_loop_mask (gsi, loop_masks,
7185 vec_num * ncopies,
7186 vectype, vec_num * j + i);
7187 if (vec_mask)
7188 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7189 vec_mask, gsi);
7190
7191 if (memory_access_type == VMAT_GATHER_SCATTER)
7192 {
7193 tree scale = size_int (gs_info.scale);
7194 gcall *call;
7195 if (loop_masks)
7196 call = gimple_build_call_internal
7197 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7198 scale, vec_oprnd, final_mask);
7199 else
7200 call = gimple_build_call_internal
7201 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7202 scale, vec_oprnd);
7203 gimple_call_set_nothrow (call, true);
7204 new_stmt_info
7205 = vect_finish_stmt_generation (stmt_info, call, gsi);
7206 break;
7207 }
7208
7209 if (i > 0)
7210 /* Bump the vector pointer. */
7211 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7212 stmt_info, bump);
7213
7214 if (slp)
7215 vec_oprnd = vec_oprnds[i];
7216 else if (grouped_store)
7217 /* For grouped stores vectorized defs are interleaved in
7218 vect_permute_store_chain(). */
7219 vec_oprnd = result_chain[i];
7220
7221 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
7222 if (aligned_access_p (first_dr_info))
7223 misalign = 0;
7224 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7225 {
7226 align = dr_alignment (vect_dr_behavior (first_dr_info));
7227 misalign = 0;
7228 }
7229 else
7230 misalign = DR_MISALIGNMENT (first_dr_info);
7231 if (dataref_offset == NULL_TREE
7232 && TREE_CODE (dataref_ptr) == SSA_NAME)
7233 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7234 misalign);
7235
7236 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7237 {
7238 tree perm_mask = perm_mask_for_reverse (vectype);
7239 tree perm_dest = vect_create_destination_var
7240 (vect_get_store_rhs (stmt_info), vectype);
7241 tree new_temp = make_ssa_name (perm_dest);
7242
7243 /* Generate the permute statement. */
7244 gimple *perm_stmt
7245 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7246 vec_oprnd, perm_mask);
7247 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7248
7249 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7250 vec_oprnd = new_temp;
7251 }
7252
7253 /* Arguments are ready. Create the new vector stmt. */
7254 if (final_mask)
7255 {
7256 align = least_bit_hwi (misalign | align);
7257 tree ptr = build_int_cst (ref_type, align);
7258 gcall *call
7259 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7260 dataref_ptr, ptr,
7261 final_mask, vec_oprnd);
7262 gimple_call_set_nothrow (call, true);
7263 new_stmt_info
7264 = vect_finish_stmt_generation (stmt_info, call, gsi);
7265 }
7266 else
7267 {
7268 data_ref = fold_build2 (MEM_REF, vectype,
7269 dataref_ptr,
7270 dataref_offset
7271 ? dataref_offset
7272 : build_int_cst (ref_type, 0));
7273 if (aligned_access_p (first_dr_info))
7274 ;
7275 else if (DR_MISALIGNMENT (first_dr_info) == -1)
7276 TREE_TYPE (data_ref)
7277 = build_aligned_type (TREE_TYPE (data_ref),
7278 align * BITS_PER_UNIT);
7279 else
7280 TREE_TYPE (data_ref)
7281 = build_aligned_type (TREE_TYPE (data_ref),
7282 TYPE_ALIGN (elem_type));
7283 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7284 gassign *new_stmt
7285 = gimple_build_assign (data_ref, vec_oprnd);
7286 new_stmt_info
7287 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7288 }
7289
7290 if (slp)
7291 continue;
7292
7293 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7294 if (!next_stmt_info)
7295 break;
7296 }
7297 }
7298 if (!slp)
7299 {
7300 if (j == 0)
7301 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7302 else
7303 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7304 prev_stmt_info = new_stmt_info;
7305 }
7306 }
7307
7308 oprnds.release ();
7309 result_chain.release ();
7310 vec_oprnds.release ();
7311
7312 return true;
7313 }
7314
7315 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7316 VECTOR_CST mask. No checks are made that the target platform supports the
7317 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7318 vect_gen_perm_mask_checked. */
7319
7320 tree
7321 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7322 {
7323 tree mask_type;
7324
7325 poly_uint64 nunits = sel.length ();
7326 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7327
7328 mask_type = build_vector_type (ssizetype, nunits);
7329 return vec_perm_indices_to_tree (mask_type, sel);
7330 }
7331
7332 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7333 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7334
7335 tree
7336 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7337 {
7338 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7339 return vect_gen_perm_mask_any (vectype, sel);
7340 }
7341
7342 /* Given a vector variable X and Y, that was generated for the scalar
7343 STMT_INFO, generate instructions to permute the vector elements of X and Y
7344 using permutation mask MASK_VEC, insert them at *GSI and return the
7345 permuted vector variable. */
7346
7347 static tree
7348 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7349 gimple_stmt_iterator *gsi)
7350 {
7351 tree vectype = TREE_TYPE (x);
7352 tree perm_dest, data_ref;
7353 gimple *perm_stmt;
7354
7355 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7356 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
7357 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7358 else
7359 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7360 data_ref = make_ssa_name (perm_dest);
7361
7362 /* Generate the permute statement. */
7363 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7364 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7365
7366 return data_ref;
7367 }
7368
7369 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7370 inserting them on the loops preheader edge. Returns true if we
7371 were successful in doing so (and thus STMT_INFO can be moved then),
7372 otherwise returns false. */
7373
7374 static bool
7375 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7376 {
7377 ssa_op_iter i;
7378 tree op;
7379 bool any = false;
7380
7381 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7382 {
7383 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7384 if (!gimple_nop_p (def_stmt)
7385 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7386 {
7387 /* Make sure we don't need to recurse. While we could do
7388 so in simple cases when there are more complex use webs
7389 we don't have an easy way to preserve stmt order to fulfil
7390 dependencies within them. */
7391 tree op2;
7392 ssa_op_iter i2;
7393 if (gimple_code (def_stmt) == GIMPLE_PHI)
7394 return false;
7395 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7396 {
7397 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7398 if (!gimple_nop_p (def_stmt2)
7399 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7400 return false;
7401 }
7402 any = true;
7403 }
7404 }
7405
7406 if (!any)
7407 return true;
7408
7409 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7410 {
7411 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7412 if (!gimple_nop_p (def_stmt)
7413 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7414 {
7415 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7416 gsi_remove (&gsi, false);
7417 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7418 }
7419 }
7420
7421 return true;
7422 }
7423
7424 /* vectorizable_load.
7425
7426 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7427 that can be vectorized.
7428 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7429 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7430 Return true if STMT_INFO is vectorizable in this way. */
7431
7432 static bool
7433 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7434 stmt_vec_info *vec_stmt, slp_tree slp_node,
7435 slp_instance slp_node_instance,
7436 stmt_vector_for_cost *cost_vec)
7437 {
7438 tree scalar_dest;
7439 tree vec_dest = NULL;
7440 tree data_ref = NULL;
7441 stmt_vec_info prev_stmt_info;
7442 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7443 struct loop *loop = NULL;
7444 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7445 bool nested_in_vect_loop = false;
7446 tree elem_type;
7447 tree new_temp;
7448 machine_mode mode;
7449 tree dummy;
7450 enum dr_alignment_support alignment_support_scheme;
7451 tree dataref_ptr = NULL_TREE;
7452 tree dataref_offset = NULL_TREE;
7453 gimple *ptr_incr = NULL;
7454 int ncopies;
7455 int i, j;
7456 unsigned int group_size;
7457 poly_uint64 group_gap_adj;
7458 tree msq = NULL_TREE, lsq;
7459 tree offset = NULL_TREE;
7460 tree byte_offset = NULL_TREE;
7461 tree realignment_token = NULL_TREE;
7462 gphi *phi = NULL;
7463 vec<tree> dr_chain = vNULL;
7464 bool grouped_load = false;
7465 stmt_vec_info first_stmt_info;
7466 stmt_vec_info first_stmt_info_for_drptr = NULL;
7467 bool compute_in_loop = false;
7468 struct loop *at_loop;
7469 int vec_num;
7470 bool slp = (slp_node != NULL);
7471 bool slp_perm = false;
7472 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7473 poly_uint64 vf;
7474 tree aggr_type;
7475 gather_scatter_info gs_info;
7476 vec_info *vinfo = stmt_info->vinfo;
7477 tree ref_type;
7478 enum vect_def_type mask_dt = vect_unknown_def_type;
7479
7480 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7481 return false;
7482
7483 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7484 && ! vec_stmt)
7485 return false;
7486
7487 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7488 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7489 {
7490 scalar_dest = gimple_assign_lhs (assign);
7491 if (TREE_CODE (scalar_dest) != SSA_NAME)
7492 return false;
7493
7494 tree_code code = gimple_assign_rhs_code (assign);
7495 if (code != ARRAY_REF
7496 && code != BIT_FIELD_REF
7497 && code != INDIRECT_REF
7498 && code != COMPONENT_REF
7499 && code != IMAGPART_EXPR
7500 && code != REALPART_EXPR
7501 && code != MEM_REF
7502 && TREE_CODE_CLASS (code) != tcc_declaration)
7503 return false;
7504 }
7505 else
7506 {
7507 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7508 if (!call || !gimple_call_internal_p (call))
7509 return false;
7510
7511 internal_fn ifn = gimple_call_internal_fn (call);
7512 if (!internal_load_fn_p (ifn))
7513 return false;
7514
7515 scalar_dest = gimple_call_lhs (call);
7516 if (!scalar_dest)
7517 return false;
7518
7519 if (slp_node != NULL)
7520 {
7521 if (dump_enabled_p ())
7522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7523 "SLP of masked loads not supported.\n");
7524 return false;
7525 }
7526
7527 int mask_index = internal_fn_mask_index (ifn);
7528 if (mask_index >= 0)
7529 {
7530 mask = gimple_call_arg (call, mask_index);
7531 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7532 &mask_vectype))
7533 return false;
7534 }
7535 }
7536
7537 if (!STMT_VINFO_DATA_REF (stmt_info))
7538 return false;
7539
7540 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7541 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7542
7543 if (loop_vinfo)
7544 {
7545 loop = LOOP_VINFO_LOOP (loop_vinfo);
7546 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7547 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7548 }
7549 else
7550 vf = 1;
7551
7552 /* Multiple types in SLP are handled by creating the appropriate number of
7553 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7554 case of SLP. */
7555 if (slp)
7556 ncopies = 1;
7557 else
7558 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7559
7560 gcc_assert (ncopies >= 1);
7561
7562 /* FORNOW. This restriction should be relaxed. */
7563 if (nested_in_vect_loop && ncopies > 1)
7564 {
7565 if (dump_enabled_p ())
7566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7567 "multiple types in nested loop.\n");
7568 return false;
7569 }
7570
7571 /* Invalidate assumptions made by dependence analysis when vectorization
7572 on the unrolled body effectively re-orders stmts. */
7573 if (ncopies > 1
7574 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7575 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7576 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7577 {
7578 if (dump_enabled_p ())
7579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7580 "cannot perform implicit CSE when unrolling "
7581 "with negative dependence distance\n");
7582 return false;
7583 }
7584
7585 elem_type = TREE_TYPE (vectype);
7586 mode = TYPE_MODE (vectype);
7587
7588 /* FORNOW. In some cases can vectorize even if data-type not supported
7589 (e.g. - data copies). */
7590 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7591 {
7592 if (dump_enabled_p ())
7593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7594 "Aligned load, but unsupported type.\n");
7595 return false;
7596 }
7597
7598 /* Check if the load is a part of an interleaving chain. */
7599 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7600 {
7601 grouped_load = true;
7602 /* FORNOW */
7603 gcc_assert (!nested_in_vect_loop);
7604 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7605
7606 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7607 group_size = DR_GROUP_SIZE (first_stmt_info);
7608
7609 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7610 slp_perm = true;
7611
7612 /* Invalidate assumptions made by dependence analysis when vectorization
7613 on the unrolled body effectively re-orders stmts. */
7614 if (!PURE_SLP_STMT (stmt_info)
7615 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7616 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7617 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7618 {
7619 if (dump_enabled_p ())
7620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7621 "cannot perform implicit CSE when performing "
7622 "group loads with negative dependence distance\n");
7623 return false;
7624 }
7625
7626 /* Similarly when the stmt is a load that is both part of a SLP
7627 instance and a loop vectorized stmt via the same-dr mechanism
7628 we have to give up. */
7629 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7630 && (STMT_SLP_TYPE (stmt_info)
7631 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
7632 {
7633 if (dump_enabled_p ())
7634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7635 "conflicting SLP types for CSEd load\n");
7636 return false;
7637 }
7638 }
7639 else
7640 group_size = 1;
7641
7642 vect_memory_access_type memory_access_type;
7643 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7644 &memory_access_type, &gs_info))
7645 return false;
7646
7647 if (mask)
7648 {
7649 if (memory_access_type == VMAT_CONTIGUOUS)
7650 {
7651 machine_mode vec_mode = TYPE_MODE (vectype);
7652 if (!VECTOR_MODE_P (vec_mode)
7653 || !can_vec_mask_load_store_p (vec_mode,
7654 TYPE_MODE (mask_vectype), true))
7655 return false;
7656 }
7657 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7658 && memory_access_type != VMAT_GATHER_SCATTER)
7659 {
7660 if (dump_enabled_p ())
7661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7662 "unsupported access type for masked load.\n");
7663 return false;
7664 }
7665 }
7666
7667 if (!vec_stmt) /* transformation not required. */
7668 {
7669 if (!slp)
7670 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7671
7672 if (loop_vinfo
7673 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7674 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7675 memory_access_type, &gs_info);
7676
7677 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7678 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7679 slp_node_instance, slp_node, cost_vec);
7680 return true;
7681 }
7682
7683 if (!slp)
7684 gcc_assert (memory_access_type
7685 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7686
7687 if (dump_enabled_p ())
7688 dump_printf_loc (MSG_NOTE, vect_location,
7689 "transform load. ncopies = %d\n", ncopies);
7690
7691 /* Transform. */
7692
7693 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7694 ensure_base_align (dr_info);
7695
7696 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7697 {
7698 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7699 return true;
7700 }
7701
7702 if (memory_access_type == VMAT_INVARIANT)
7703 {
7704 gcc_assert (!grouped_load && !mask && !bb_vinfo);
7705 /* If we have versioned for aliasing or the loop doesn't
7706 have any data dependencies that would preclude this,
7707 then we are sure this is a loop invariant load and
7708 thus we can insert it on the preheader edge. */
7709 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7710 && !nested_in_vect_loop
7711 && hoist_defs_of_uses (stmt_info, loop));
7712 if (hoist_p)
7713 {
7714 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
7715 if (dump_enabled_p ())
7716 dump_printf_loc (MSG_NOTE, vect_location,
7717 "hoisting out of the vectorized loop: %G", stmt);
7718 scalar_dest = copy_ssa_name (scalar_dest);
7719 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
7720 gsi_insert_on_edge_immediate
7721 (loop_preheader_edge (loop),
7722 gimple_build_assign (scalar_dest, rhs));
7723 }
7724 /* These copies are all equivalent, but currently the representation
7725 requires a separate STMT_VINFO_VEC_STMT for each one. */
7726 prev_stmt_info = NULL;
7727 gimple_stmt_iterator gsi2 = *gsi;
7728 gsi_next (&gsi2);
7729 for (j = 0; j < ncopies; j++)
7730 {
7731 stmt_vec_info new_stmt_info;
7732 if (hoist_p)
7733 {
7734 new_temp = vect_init_vector (stmt_info, scalar_dest,
7735 vectype, NULL);
7736 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
7737 new_stmt_info = vinfo->add_stmt (new_stmt);
7738 }
7739 else
7740 {
7741 new_temp = vect_init_vector (stmt_info, scalar_dest,
7742 vectype, &gsi2);
7743 new_stmt_info = vinfo->lookup_def (new_temp);
7744 }
7745 if (slp)
7746 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7747 else if (j == 0)
7748 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7749 else
7750 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7751 prev_stmt_info = new_stmt_info;
7752 }
7753 return true;
7754 }
7755
7756 if (memory_access_type == VMAT_ELEMENTWISE
7757 || memory_access_type == VMAT_STRIDED_SLP)
7758 {
7759 gimple_stmt_iterator incr_gsi;
7760 bool insert_after;
7761 gimple *incr;
7762 tree offvar;
7763 tree ivstep;
7764 tree running_off;
7765 vec<constructor_elt, va_gc> *v = NULL;
7766 tree stride_base, stride_step, alias_off;
7767 /* Checked by get_load_store_type. */
7768 unsigned int const_nunits = nunits.to_constant ();
7769 unsigned HOST_WIDE_INT cst_offset = 0;
7770
7771 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7772 gcc_assert (!nested_in_vect_loop);
7773
7774 if (grouped_load)
7775 {
7776 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7777 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7778 }
7779 else
7780 {
7781 first_stmt_info = stmt_info;
7782 first_dr_info = dr_info;
7783 }
7784 if (slp && grouped_load)
7785 {
7786 group_size = DR_GROUP_SIZE (first_stmt_info);
7787 ref_type = get_group_alias_ptr_type (first_stmt_info);
7788 }
7789 else
7790 {
7791 if (grouped_load)
7792 cst_offset
7793 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7794 * vect_get_place_in_interleaving_chain (stmt_info,
7795 first_stmt_info));
7796 group_size = 1;
7797 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7798 }
7799
7800 stride_base
7801 = fold_build_pointer_plus
7802 (DR_BASE_ADDRESS (first_dr_info->dr),
7803 size_binop (PLUS_EXPR,
7804 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7805 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7806 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7807
7808 /* For a load with loop-invariant (but other than power-of-2)
7809 stride (i.e. not a grouped access) like so:
7810
7811 for (i = 0; i < n; i += stride)
7812 ... = array[i];
7813
7814 we generate a new induction variable and new accesses to
7815 form a new vector (or vectors, depending on ncopies):
7816
7817 for (j = 0; ; j += VF*stride)
7818 tmp1 = array[j];
7819 tmp2 = array[j + stride];
7820 ...
7821 vectemp = {tmp1, tmp2, ...}
7822 */
7823
7824 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7825 build_int_cst (TREE_TYPE (stride_step), vf));
7826
7827 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7828
7829 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7830 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7831 create_iv (stride_base, ivstep, NULL,
7832 loop, &incr_gsi, insert_after,
7833 &offvar, NULL);
7834 incr = gsi_stmt (incr_gsi);
7835 loop_vinfo->add_stmt (incr);
7836
7837 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7838
7839 prev_stmt_info = NULL;
7840 running_off = offvar;
7841 alias_off = build_int_cst (ref_type, 0);
7842 int nloads = const_nunits;
7843 int lnel = 1;
7844 tree ltype = TREE_TYPE (vectype);
7845 tree lvectype = vectype;
7846 auto_vec<tree> dr_chain;
7847 if (memory_access_type == VMAT_STRIDED_SLP)
7848 {
7849 if (group_size < const_nunits)
7850 {
7851 /* First check if vec_init optab supports construction from
7852 vector elts directly. */
7853 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7854 machine_mode vmode;
7855 if (mode_for_vector (elmode, group_size).exists (&vmode)
7856 && VECTOR_MODE_P (vmode)
7857 && targetm.vector_mode_supported_p (vmode)
7858 && (convert_optab_handler (vec_init_optab,
7859 TYPE_MODE (vectype), vmode)
7860 != CODE_FOR_nothing))
7861 {
7862 nloads = const_nunits / group_size;
7863 lnel = group_size;
7864 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7865 }
7866 else
7867 {
7868 /* Otherwise avoid emitting a constructor of vector elements
7869 by performing the loads using an integer type of the same
7870 size, constructing a vector of those and then
7871 re-interpreting it as the original vector type.
7872 This avoids a huge runtime penalty due to the general
7873 inability to perform store forwarding from smaller stores
7874 to a larger load. */
7875 unsigned lsize
7876 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7877 unsigned int lnunits = const_nunits / group_size;
7878 /* If we can't construct such a vector fall back to
7879 element loads of the original vector type. */
7880 if (int_mode_for_size (lsize, 0).exists (&elmode)
7881 && mode_for_vector (elmode, lnunits).exists (&vmode)
7882 && VECTOR_MODE_P (vmode)
7883 && targetm.vector_mode_supported_p (vmode)
7884 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7885 != CODE_FOR_nothing))
7886 {
7887 nloads = lnunits;
7888 lnel = group_size;
7889 ltype = build_nonstandard_integer_type (lsize, 1);
7890 lvectype = build_vector_type (ltype, nloads);
7891 }
7892 }
7893 }
7894 else
7895 {
7896 nloads = 1;
7897 lnel = const_nunits;
7898 ltype = vectype;
7899 }
7900 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7901 }
7902 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7903 else if (nloads == 1)
7904 ltype = vectype;
7905
7906 if (slp)
7907 {
7908 /* For SLP permutation support we need to load the whole group,
7909 not only the number of vector stmts the permutation result
7910 fits in. */
7911 if (slp_perm)
7912 {
7913 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7914 variable VF. */
7915 unsigned int const_vf = vf.to_constant ();
7916 ncopies = CEIL (group_size * const_vf, const_nunits);
7917 dr_chain.create (ncopies);
7918 }
7919 else
7920 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7921 }
7922 unsigned int group_el = 0;
7923 unsigned HOST_WIDE_INT
7924 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7925 for (j = 0; j < ncopies; j++)
7926 {
7927 if (nloads > 1)
7928 vec_alloc (v, nloads);
7929 stmt_vec_info new_stmt_info = NULL;
7930 for (i = 0; i < nloads; i++)
7931 {
7932 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7933 group_el * elsz + cst_offset);
7934 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7935 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
7936 gassign *new_stmt
7937 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7938 new_stmt_info
7939 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7940 if (nloads > 1)
7941 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7942 gimple_assign_lhs (new_stmt));
7943
7944 group_el += lnel;
7945 if (! slp
7946 || group_el == group_size)
7947 {
7948 tree newoff = copy_ssa_name (running_off);
7949 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7950 running_off, stride_step);
7951 vect_finish_stmt_generation (stmt_info, incr, gsi);
7952
7953 running_off = newoff;
7954 group_el = 0;
7955 }
7956 }
7957 if (nloads > 1)
7958 {
7959 tree vec_inv = build_constructor (lvectype, v);
7960 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
7961 new_stmt_info = vinfo->lookup_def (new_temp);
7962 if (lvectype != vectype)
7963 {
7964 gassign *new_stmt
7965 = gimple_build_assign (make_ssa_name (vectype),
7966 VIEW_CONVERT_EXPR,
7967 build1 (VIEW_CONVERT_EXPR,
7968 vectype, new_temp));
7969 new_stmt_info
7970 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7971 }
7972 }
7973
7974 if (slp)
7975 {
7976 if (slp_perm)
7977 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
7978 else
7979 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7980 }
7981 else
7982 {
7983 if (j == 0)
7984 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7985 else
7986 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7987 prev_stmt_info = new_stmt_info;
7988 }
7989 }
7990 if (slp_perm)
7991 {
7992 unsigned n_perms;
7993 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7994 slp_node_instance, false, &n_perms);
7995 }
7996 return true;
7997 }
7998
7999 if (memory_access_type == VMAT_GATHER_SCATTER
8000 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8001 grouped_load = false;
8002
8003 if (grouped_load)
8004 {
8005 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8006 group_size = DR_GROUP_SIZE (first_stmt_info);
8007 /* For SLP vectorization we directly vectorize a subchain
8008 without permutation. */
8009 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8010 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8011 /* For BB vectorization always use the first stmt to base
8012 the data ref pointer on. */
8013 if (bb_vinfo)
8014 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8015
8016 /* Check if the chain of loads is already vectorized. */
8017 if (STMT_VINFO_VEC_STMT (first_stmt_info)
8018 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8019 ??? But we can only do so if there is exactly one
8020 as we have no way to get at the rest. Leave the CSE
8021 opportunity alone.
8022 ??? With the group load eventually participating
8023 in multiple different permutations (having multiple
8024 slp nodes which refer to the same group) the CSE
8025 is even wrong code. See PR56270. */
8026 && !slp)
8027 {
8028 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8029 return true;
8030 }
8031 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8032 group_gap_adj = 0;
8033
8034 /* VEC_NUM is the number of vect stmts to be created for this group. */
8035 if (slp)
8036 {
8037 grouped_load = false;
8038 /* If an SLP permutation is from N elements to N elements,
8039 and if one vector holds a whole number of N, we can load
8040 the inputs to the permutation in the same way as an
8041 unpermuted sequence. In other cases we need to load the
8042 whole group, not only the number of vector stmts the
8043 permutation result fits in. */
8044 if (slp_perm
8045 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
8046 || !multiple_p (nunits, group_size)))
8047 {
8048 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8049 variable VF; see vect_transform_slp_perm_load. */
8050 unsigned int const_vf = vf.to_constant ();
8051 unsigned int const_nunits = nunits.to_constant ();
8052 vec_num = CEIL (group_size * const_vf, const_nunits);
8053 group_gap_adj = vf * group_size - nunits * vec_num;
8054 }
8055 else
8056 {
8057 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8058 group_gap_adj
8059 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
8060 }
8061 }
8062 else
8063 vec_num = group_size;
8064
8065 ref_type = get_group_alias_ptr_type (first_stmt_info);
8066 }
8067 else
8068 {
8069 first_stmt_info = stmt_info;
8070 first_dr_info = dr_info;
8071 group_size = vec_num = 1;
8072 group_gap_adj = 0;
8073 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8074 }
8075
8076 alignment_support_scheme
8077 = vect_supportable_dr_alignment (first_dr_info, false);
8078 gcc_assert (alignment_support_scheme);
8079 vec_loop_masks *loop_masks
8080 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8081 ? &LOOP_VINFO_MASKS (loop_vinfo)
8082 : NULL);
8083 /* Targets with store-lane instructions must not require explicit
8084 realignment. vect_supportable_dr_alignment always returns either
8085 dr_aligned or dr_unaligned_supported for masked operations. */
8086 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8087 && !mask
8088 && !loop_masks)
8089 || alignment_support_scheme == dr_aligned
8090 || alignment_support_scheme == dr_unaligned_supported);
8091
8092 /* In case the vectorization factor (VF) is bigger than the number
8093 of elements that we can fit in a vectype (nunits), we have to generate
8094 more than one vector stmt - i.e - we need to "unroll" the
8095 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8096 from one copy of the vector stmt to the next, in the field
8097 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8098 stages to find the correct vector defs to be used when vectorizing
8099 stmts that use the defs of the current stmt. The example below
8100 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8101 need to create 4 vectorized stmts):
8102
8103 before vectorization:
8104 RELATED_STMT VEC_STMT
8105 S1: x = memref - -
8106 S2: z = x + 1 - -
8107
8108 step 1: vectorize stmt S1:
8109 We first create the vector stmt VS1_0, and, as usual, record a
8110 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8111 Next, we create the vector stmt VS1_1, and record a pointer to
8112 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8113 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8114 stmts and pointers:
8115 RELATED_STMT VEC_STMT
8116 VS1_0: vx0 = memref0 VS1_1 -
8117 VS1_1: vx1 = memref1 VS1_2 -
8118 VS1_2: vx2 = memref2 VS1_3 -
8119 VS1_3: vx3 = memref3 - -
8120 S1: x = load - VS1_0
8121 S2: z = x + 1 - -
8122
8123 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8124 information we recorded in RELATED_STMT field is used to vectorize
8125 stmt S2. */
8126
8127 /* In case of interleaving (non-unit grouped access):
8128
8129 S1: x2 = &base + 2
8130 S2: x0 = &base
8131 S3: x1 = &base + 1
8132 S4: x3 = &base + 3
8133
8134 Vectorized loads are created in the order of memory accesses
8135 starting from the access of the first stmt of the chain:
8136
8137 VS1: vx0 = &base
8138 VS2: vx1 = &base + vec_size*1
8139 VS3: vx3 = &base + vec_size*2
8140 VS4: vx4 = &base + vec_size*3
8141
8142 Then permutation statements are generated:
8143
8144 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8145 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8146 ...
8147
8148 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8149 (the order of the data-refs in the output of vect_permute_load_chain
8150 corresponds to the order of scalar stmts in the interleaving chain - see
8151 the documentation of vect_permute_load_chain()).
8152 The generation of permutation stmts and recording them in
8153 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8154
8155 In case of both multiple types and interleaving, the vector loads and
8156 permutation stmts above are created for every copy. The result vector
8157 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8158 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8159
8160 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8161 on a target that supports unaligned accesses (dr_unaligned_supported)
8162 we generate the following code:
8163 p = initial_addr;
8164 indx = 0;
8165 loop {
8166 p = p + indx * vectype_size;
8167 vec_dest = *(p);
8168 indx = indx + 1;
8169 }
8170
8171 Otherwise, the data reference is potentially unaligned on a target that
8172 does not support unaligned accesses (dr_explicit_realign_optimized) -
8173 then generate the following code, in which the data in each iteration is
8174 obtained by two vector loads, one from the previous iteration, and one
8175 from the current iteration:
8176 p1 = initial_addr;
8177 msq_init = *(floor(p1))
8178 p2 = initial_addr + VS - 1;
8179 realignment_token = call target_builtin;
8180 indx = 0;
8181 loop {
8182 p2 = p2 + indx * vectype_size
8183 lsq = *(floor(p2))
8184 vec_dest = realign_load (msq, lsq, realignment_token)
8185 indx = indx + 1;
8186 msq = lsq;
8187 } */
8188
8189 /* If the misalignment remains the same throughout the execution of the
8190 loop, we can create the init_addr and permutation mask at the loop
8191 preheader. Otherwise, it needs to be created inside the loop.
8192 This can only occur when vectorizing memory accesses in the inner-loop
8193 nested within an outer-loop that is being vectorized. */
8194
8195 if (nested_in_vect_loop
8196 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
8197 GET_MODE_SIZE (TYPE_MODE (vectype))))
8198 {
8199 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8200 compute_in_loop = true;
8201 }
8202
8203 if ((alignment_support_scheme == dr_explicit_realign_optimized
8204 || alignment_support_scheme == dr_explicit_realign)
8205 && !compute_in_loop)
8206 {
8207 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8208 alignment_support_scheme, NULL_TREE,
8209 &at_loop);
8210 if (alignment_support_scheme == dr_explicit_realign_optimized)
8211 {
8212 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8213 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8214 size_one_node);
8215 }
8216 }
8217 else
8218 at_loop = loop;
8219
8220 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8221 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8222
8223 tree bump;
8224 tree vec_offset = NULL_TREE;
8225 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8226 {
8227 aggr_type = NULL_TREE;
8228 bump = NULL_TREE;
8229 }
8230 else if (memory_access_type == VMAT_GATHER_SCATTER)
8231 {
8232 aggr_type = elem_type;
8233 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8234 &bump, &vec_offset);
8235 }
8236 else
8237 {
8238 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8239 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8240 else
8241 aggr_type = vectype;
8242 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8243 memory_access_type);
8244 }
8245
8246 tree vec_mask = NULL_TREE;
8247 prev_stmt_info = NULL;
8248 poly_uint64 group_elt = 0;
8249 for (j = 0; j < ncopies; j++)
8250 {
8251 stmt_vec_info new_stmt_info = NULL;
8252 /* 1. Create the vector or array pointer update chain. */
8253 if (j == 0)
8254 {
8255 bool simd_lane_access_p
8256 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8257 if (simd_lane_access_p
8258 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8259 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8260 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8261 && integer_zerop (DR_INIT (first_dr_info->dr))
8262 && alias_sets_conflict_p (get_alias_set (aggr_type),
8263 get_alias_set (TREE_TYPE (ref_type)))
8264 && (alignment_support_scheme == dr_aligned
8265 || alignment_support_scheme == dr_unaligned_supported))
8266 {
8267 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8268 dataref_offset = build_int_cst (ref_type, 0);
8269 }
8270 else if (first_stmt_info_for_drptr
8271 && first_stmt_info != first_stmt_info_for_drptr)
8272 {
8273 dataref_ptr
8274 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8275 aggr_type, at_loop, offset, &dummy,
8276 gsi, &ptr_incr, simd_lane_access_p,
8277 byte_offset, bump);
8278 /* Adjust the pointer by the difference to first_stmt. */
8279 data_reference_p ptrdr
8280 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8281 tree diff
8282 = fold_convert (sizetype,
8283 size_binop (MINUS_EXPR,
8284 DR_INIT (first_dr_info->dr),
8285 DR_INIT (ptrdr)));
8286 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8287 stmt_info, diff);
8288 }
8289 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8290 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8291 &dataref_ptr, &vec_offset);
8292 else
8293 dataref_ptr
8294 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8295 offset, &dummy, gsi, &ptr_incr,
8296 simd_lane_access_p,
8297 byte_offset, bump);
8298 if (mask)
8299 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8300 mask_vectype);
8301 }
8302 else
8303 {
8304 if (dataref_offset)
8305 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8306 bump);
8307 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8308 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8309 else
8310 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8311 stmt_info, bump);
8312 if (mask)
8313 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8314 }
8315
8316 if (grouped_load || slp_perm)
8317 dr_chain.create (vec_num);
8318
8319 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8320 {
8321 tree vec_array;
8322
8323 vec_array = create_vector_array (vectype, vec_num);
8324
8325 tree final_mask = NULL_TREE;
8326 if (loop_masks)
8327 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8328 vectype, j);
8329 if (vec_mask)
8330 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8331 vec_mask, gsi);
8332
8333 gcall *call;
8334 if (final_mask)
8335 {
8336 /* Emit:
8337 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8338 VEC_MASK). */
8339 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8340 tree alias_ptr = build_int_cst (ref_type, align);
8341 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8342 dataref_ptr, alias_ptr,
8343 final_mask);
8344 }
8345 else
8346 {
8347 /* Emit:
8348 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8349 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8350 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8351 }
8352 gimple_call_set_lhs (call, vec_array);
8353 gimple_call_set_nothrow (call, true);
8354 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8355
8356 /* Extract each vector into an SSA_NAME. */
8357 for (i = 0; i < vec_num; i++)
8358 {
8359 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8360 vec_array, i);
8361 dr_chain.quick_push (new_temp);
8362 }
8363
8364 /* Record the mapping between SSA_NAMEs and statements. */
8365 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8366
8367 /* Record that VEC_ARRAY is now dead. */
8368 vect_clobber_variable (stmt_info, gsi, vec_array);
8369 }
8370 else
8371 {
8372 for (i = 0; i < vec_num; i++)
8373 {
8374 tree final_mask = NULL_TREE;
8375 if (loop_masks
8376 && memory_access_type != VMAT_INVARIANT)
8377 final_mask = vect_get_loop_mask (gsi, loop_masks,
8378 vec_num * ncopies,
8379 vectype, vec_num * j + i);
8380 if (vec_mask)
8381 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8382 vec_mask, gsi);
8383
8384 if (i > 0)
8385 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8386 stmt_info, bump);
8387
8388 /* 2. Create the vector-load in the loop. */
8389 gimple *new_stmt = NULL;
8390 switch (alignment_support_scheme)
8391 {
8392 case dr_aligned:
8393 case dr_unaligned_supported:
8394 {
8395 unsigned int misalign;
8396 unsigned HOST_WIDE_INT align;
8397
8398 if (memory_access_type == VMAT_GATHER_SCATTER)
8399 {
8400 tree scale = size_int (gs_info.scale);
8401 gcall *call;
8402 if (loop_masks)
8403 call = gimple_build_call_internal
8404 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8405 vec_offset, scale, final_mask);
8406 else
8407 call = gimple_build_call_internal
8408 (IFN_GATHER_LOAD, 3, dataref_ptr,
8409 vec_offset, scale);
8410 gimple_call_set_nothrow (call, true);
8411 new_stmt = call;
8412 data_ref = NULL_TREE;
8413 break;
8414 }
8415
8416 align =
8417 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8418 if (alignment_support_scheme == dr_aligned)
8419 {
8420 gcc_assert (aligned_access_p (first_dr_info));
8421 misalign = 0;
8422 }
8423 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8424 {
8425 align = dr_alignment
8426 (vect_dr_behavior (first_dr_info));
8427 misalign = 0;
8428 }
8429 else
8430 misalign = DR_MISALIGNMENT (first_dr_info);
8431 if (dataref_offset == NULL_TREE
8432 && TREE_CODE (dataref_ptr) == SSA_NAME)
8433 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8434 align, misalign);
8435
8436 if (final_mask)
8437 {
8438 align = least_bit_hwi (misalign | align);
8439 tree ptr = build_int_cst (ref_type, align);
8440 gcall *call
8441 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8442 dataref_ptr, ptr,
8443 final_mask);
8444 gimple_call_set_nothrow (call, true);
8445 new_stmt = call;
8446 data_ref = NULL_TREE;
8447 }
8448 else
8449 {
8450 data_ref
8451 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8452 dataref_offset
8453 ? dataref_offset
8454 : build_int_cst (ref_type, 0));
8455 if (alignment_support_scheme == dr_aligned)
8456 ;
8457 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8458 TREE_TYPE (data_ref)
8459 = build_aligned_type (TREE_TYPE (data_ref),
8460 align * BITS_PER_UNIT);
8461 else
8462 TREE_TYPE (data_ref)
8463 = build_aligned_type (TREE_TYPE (data_ref),
8464 TYPE_ALIGN (elem_type));
8465 }
8466 break;
8467 }
8468 case dr_explicit_realign:
8469 {
8470 tree ptr, bump;
8471
8472 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8473
8474 if (compute_in_loop)
8475 msq = vect_setup_realignment (first_stmt_info, gsi,
8476 &realignment_token,
8477 dr_explicit_realign,
8478 dataref_ptr, NULL);
8479
8480 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8481 ptr = copy_ssa_name (dataref_ptr);
8482 else
8483 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8484 // For explicit realign the target alignment should be
8485 // known at compile time.
8486 unsigned HOST_WIDE_INT align =
8487 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8488 new_stmt = gimple_build_assign
8489 (ptr, BIT_AND_EXPR, dataref_ptr,
8490 build_int_cst
8491 (TREE_TYPE (dataref_ptr),
8492 -(HOST_WIDE_INT) align));
8493 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8494 data_ref
8495 = build2 (MEM_REF, vectype, ptr,
8496 build_int_cst (ref_type, 0));
8497 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8498 vec_dest = vect_create_destination_var (scalar_dest,
8499 vectype);
8500 new_stmt = gimple_build_assign (vec_dest, data_ref);
8501 new_temp = make_ssa_name (vec_dest, new_stmt);
8502 gimple_assign_set_lhs (new_stmt, new_temp);
8503 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8504 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8505 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8506 msq = new_temp;
8507
8508 bump = size_binop (MULT_EXPR, vs,
8509 TYPE_SIZE_UNIT (elem_type));
8510 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8511 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8512 stmt_info, bump);
8513 new_stmt = gimple_build_assign
8514 (NULL_TREE, BIT_AND_EXPR, ptr,
8515 build_int_cst
8516 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8517 ptr = copy_ssa_name (ptr, new_stmt);
8518 gimple_assign_set_lhs (new_stmt, ptr);
8519 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8520 data_ref
8521 = build2 (MEM_REF, vectype, ptr,
8522 build_int_cst (ref_type, 0));
8523 break;
8524 }
8525 case dr_explicit_realign_optimized:
8526 {
8527 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8528 new_temp = copy_ssa_name (dataref_ptr);
8529 else
8530 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8531 // We should only be doing this if we know the target
8532 // alignment at compile time.
8533 unsigned HOST_WIDE_INT align =
8534 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
8535 new_stmt = gimple_build_assign
8536 (new_temp, BIT_AND_EXPR, dataref_ptr,
8537 build_int_cst (TREE_TYPE (dataref_ptr),
8538 -(HOST_WIDE_INT) align));
8539 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8540 data_ref
8541 = build2 (MEM_REF, vectype, new_temp,
8542 build_int_cst (ref_type, 0));
8543 break;
8544 }
8545 default:
8546 gcc_unreachable ();
8547 }
8548 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8549 /* DATA_REF is null if we've already built the statement. */
8550 if (data_ref)
8551 {
8552 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8553 new_stmt = gimple_build_assign (vec_dest, data_ref);
8554 }
8555 new_temp = make_ssa_name (vec_dest, new_stmt);
8556 gimple_set_lhs (new_stmt, new_temp);
8557 new_stmt_info
8558 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8559
8560 /* 3. Handle explicit realignment if necessary/supported.
8561 Create in loop:
8562 vec_dest = realign_load (msq, lsq, realignment_token) */
8563 if (alignment_support_scheme == dr_explicit_realign_optimized
8564 || alignment_support_scheme == dr_explicit_realign)
8565 {
8566 lsq = gimple_assign_lhs (new_stmt);
8567 if (!realignment_token)
8568 realignment_token = dataref_ptr;
8569 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8570 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8571 msq, lsq, realignment_token);
8572 new_temp = make_ssa_name (vec_dest, new_stmt);
8573 gimple_assign_set_lhs (new_stmt, new_temp);
8574 new_stmt_info
8575 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8576
8577 if (alignment_support_scheme == dr_explicit_realign_optimized)
8578 {
8579 gcc_assert (phi);
8580 if (i == vec_num - 1 && j == ncopies - 1)
8581 add_phi_arg (phi, lsq,
8582 loop_latch_edge (containing_loop),
8583 UNKNOWN_LOCATION);
8584 msq = lsq;
8585 }
8586 }
8587
8588 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8589 {
8590 tree perm_mask = perm_mask_for_reverse (vectype);
8591 new_temp = permute_vec_elements (new_temp, new_temp,
8592 perm_mask, stmt_info, gsi);
8593 new_stmt_info = vinfo->lookup_def (new_temp);
8594 }
8595
8596 /* Collect vector loads and later create their permutation in
8597 vect_transform_grouped_load (). */
8598 if (grouped_load || slp_perm)
8599 dr_chain.quick_push (new_temp);
8600
8601 /* Store vector loads in the corresponding SLP_NODE. */
8602 if (slp && !slp_perm)
8603 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8604
8605 /* With SLP permutation we load the gaps as well, without
8606 we need to skip the gaps after we manage to fully load
8607 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8608 group_elt += nunits;
8609 if (maybe_ne (group_gap_adj, 0U)
8610 && !slp_perm
8611 && known_eq (group_elt, group_size - group_gap_adj))
8612 {
8613 poly_wide_int bump_val
8614 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8615 * group_gap_adj);
8616 tree bump = wide_int_to_tree (sizetype, bump_val);
8617 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8618 stmt_info, bump);
8619 group_elt = 0;
8620 }
8621 }
8622 /* Bump the vector pointer to account for a gap or for excess
8623 elements loaded for a permuted SLP load. */
8624 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8625 {
8626 poly_wide_int bump_val
8627 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8628 * group_gap_adj);
8629 tree bump = wide_int_to_tree (sizetype, bump_val);
8630 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8631 stmt_info, bump);
8632 }
8633 }
8634
8635 if (slp && !slp_perm)
8636 continue;
8637
8638 if (slp_perm)
8639 {
8640 unsigned n_perms;
8641 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8642 slp_node_instance, false,
8643 &n_perms))
8644 {
8645 dr_chain.release ();
8646 return false;
8647 }
8648 }
8649 else
8650 {
8651 if (grouped_load)
8652 {
8653 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8654 vect_transform_grouped_load (stmt_info, dr_chain,
8655 group_size, gsi);
8656 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8657 }
8658 else
8659 {
8660 if (j == 0)
8661 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8662 else
8663 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8664 prev_stmt_info = new_stmt_info;
8665 }
8666 }
8667 dr_chain.release ();
8668 }
8669
8670 return true;
8671 }
8672
8673 /* Function vect_is_simple_cond.
8674
8675 Input:
8676 LOOP - the loop that is being vectorized.
8677 COND - Condition that is checked for simple use.
8678
8679 Output:
8680 *COMP_VECTYPE - the vector type for the comparison.
8681 *DTS - The def types for the arguments of the comparison
8682
8683 Returns whether a COND can be vectorized. Checks whether
8684 condition operands are supportable using vec_is_simple_use. */
8685
8686 static bool
8687 vect_is_simple_cond (tree cond, vec_info *vinfo,
8688 tree *comp_vectype, enum vect_def_type *dts,
8689 tree vectype)
8690 {
8691 tree lhs, rhs;
8692 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8693
8694 /* Mask case. */
8695 if (TREE_CODE (cond) == SSA_NAME
8696 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8697 {
8698 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8699 || !*comp_vectype
8700 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8701 return false;
8702 return true;
8703 }
8704
8705 if (!COMPARISON_CLASS_P (cond))
8706 return false;
8707
8708 lhs = TREE_OPERAND (cond, 0);
8709 rhs = TREE_OPERAND (cond, 1);
8710
8711 if (TREE_CODE (lhs) == SSA_NAME)
8712 {
8713 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8714 return false;
8715 }
8716 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8717 || TREE_CODE (lhs) == FIXED_CST)
8718 dts[0] = vect_constant_def;
8719 else
8720 return false;
8721
8722 if (TREE_CODE (rhs) == SSA_NAME)
8723 {
8724 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8725 return false;
8726 }
8727 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8728 || TREE_CODE (rhs) == FIXED_CST)
8729 dts[1] = vect_constant_def;
8730 else
8731 return false;
8732
8733 if (vectype1 && vectype2
8734 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8735 TYPE_VECTOR_SUBPARTS (vectype2)))
8736 return false;
8737
8738 *comp_vectype = vectype1 ? vectype1 : vectype2;
8739 /* Invariant comparison. */
8740 if (! *comp_vectype && vectype)
8741 {
8742 tree scalar_type = TREE_TYPE (lhs);
8743 /* If we can widen the comparison to match vectype do so. */
8744 if (INTEGRAL_TYPE_P (scalar_type)
8745 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8746 TYPE_SIZE (TREE_TYPE (vectype))))
8747 scalar_type = build_nonstandard_integer_type
8748 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8749 TYPE_UNSIGNED (scalar_type));
8750 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8751 }
8752
8753 return true;
8754 }
8755
8756 /* vectorizable_condition.
8757
8758 Check if STMT_INFO is conditional modify expression that can be vectorized.
8759 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8760 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8761 at GSI.
8762
8763 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
8764
8765 Return true if STMT_INFO is vectorizable in this way. */
8766
8767 bool
8768 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8769 stmt_vec_info *vec_stmt, bool for_reduction,
8770 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8771 {
8772 vec_info *vinfo = stmt_info->vinfo;
8773 tree scalar_dest = NULL_TREE;
8774 tree vec_dest = NULL_TREE;
8775 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8776 tree then_clause, else_clause;
8777 tree comp_vectype = NULL_TREE;
8778 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8779 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8780 tree vec_compare;
8781 tree new_temp;
8782 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8783 enum vect_def_type dts[4]
8784 = {vect_unknown_def_type, vect_unknown_def_type,
8785 vect_unknown_def_type, vect_unknown_def_type};
8786 int ndts = 4;
8787 int ncopies;
8788 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8789 stmt_vec_info prev_stmt_info = NULL;
8790 int i, j;
8791 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8792 vec<tree> vec_oprnds0 = vNULL;
8793 vec<tree> vec_oprnds1 = vNULL;
8794 vec<tree> vec_oprnds2 = vNULL;
8795 vec<tree> vec_oprnds3 = vNULL;
8796 tree vec_cmp_type;
8797 bool masked = false;
8798
8799 if (for_reduction && STMT_SLP_TYPE (stmt_info))
8800 return false;
8801
8802 vect_reduction_type reduction_type
8803 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8804 if (reduction_type == TREE_CODE_REDUCTION)
8805 {
8806 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8807 return false;
8808
8809 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8810 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8811 && for_reduction))
8812 return false;
8813
8814 /* FORNOW: not yet supported. */
8815 if (STMT_VINFO_LIVE_P (stmt_info))
8816 {
8817 if (dump_enabled_p ())
8818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8819 "value used after loop.\n");
8820 return false;
8821 }
8822 }
8823
8824 /* Is vectorizable conditional operation? */
8825 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8826 if (!stmt)
8827 return false;
8828
8829 code = gimple_assign_rhs_code (stmt);
8830
8831 if (code != COND_EXPR)
8832 return false;
8833
8834 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8835 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8836
8837 if (slp_node)
8838 ncopies = 1;
8839 else
8840 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8841
8842 gcc_assert (ncopies >= 1);
8843 if (for_reduction && ncopies > 1)
8844 return false; /* FORNOW */
8845
8846 cond_expr = gimple_assign_rhs1 (stmt);
8847 then_clause = gimple_assign_rhs2 (stmt);
8848 else_clause = gimple_assign_rhs3 (stmt);
8849
8850 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8851 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8852 || !comp_vectype)
8853 return false;
8854
8855 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8856 return false;
8857 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8858 return false;
8859
8860 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8861 return false;
8862
8863 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8864 return false;
8865
8866 masked = !COMPARISON_CLASS_P (cond_expr);
8867 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8868
8869 if (vec_cmp_type == NULL_TREE)
8870 return false;
8871
8872 cond_code = TREE_CODE (cond_expr);
8873 if (!masked)
8874 {
8875 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8876 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8877 }
8878
8879 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8880 {
8881 /* Boolean values may have another representation in vectors
8882 and therefore we prefer bit operations over comparison for
8883 them (which also works for scalar masks). We store opcodes
8884 to use in bitop1 and bitop2. Statement is vectorized as
8885 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8886 depending on bitop1 and bitop2 arity. */
8887 switch (cond_code)
8888 {
8889 case GT_EXPR:
8890 bitop1 = BIT_NOT_EXPR;
8891 bitop2 = BIT_AND_EXPR;
8892 break;
8893 case GE_EXPR:
8894 bitop1 = BIT_NOT_EXPR;
8895 bitop2 = BIT_IOR_EXPR;
8896 break;
8897 case LT_EXPR:
8898 bitop1 = BIT_NOT_EXPR;
8899 bitop2 = BIT_AND_EXPR;
8900 std::swap (cond_expr0, cond_expr1);
8901 break;
8902 case LE_EXPR:
8903 bitop1 = BIT_NOT_EXPR;
8904 bitop2 = BIT_IOR_EXPR;
8905 std::swap (cond_expr0, cond_expr1);
8906 break;
8907 case NE_EXPR:
8908 bitop1 = BIT_XOR_EXPR;
8909 break;
8910 case EQ_EXPR:
8911 bitop1 = BIT_XOR_EXPR;
8912 bitop2 = BIT_NOT_EXPR;
8913 break;
8914 default:
8915 return false;
8916 }
8917 cond_code = SSA_NAME;
8918 }
8919
8920 if (!vec_stmt)
8921 {
8922 if (bitop1 != NOP_EXPR)
8923 {
8924 machine_mode mode = TYPE_MODE (comp_vectype);
8925 optab optab;
8926
8927 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8928 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8929 return false;
8930
8931 if (bitop2 != NOP_EXPR)
8932 {
8933 optab = optab_for_tree_code (bitop2, comp_vectype,
8934 optab_default);
8935 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8936 return false;
8937 }
8938 }
8939 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8940 cond_code))
8941 {
8942 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8943 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8944 cost_vec);
8945 return true;
8946 }
8947 return false;
8948 }
8949
8950 /* Transform. */
8951
8952 if (!slp_node)
8953 {
8954 vec_oprnds0.create (1);
8955 vec_oprnds1.create (1);
8956 vec_oprnds2.create (1);
8957 vec_oprnds3.create (1);
8958 }
8959
8960 /* Handle def. */
8961 scalar_dest = gimple_assign_lhs (stmt);
8962 if (reduction_type != EXTRACT_LAST_REDUCTION)
8963 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8964
8965 /* Handle cond expr. */
8966 for (j = 0; j < ncopies; j++)
8967 {
8968 stmt_vec_info new_stmt_info = NULL;
8969 if (j == 0)
8970 {
8971 if (slp_node)
8972 {
8973 auto_vec<tree, 4> ops;
8974 auto_vec<vec<tree>, 4> vec_defs;
8975
8976 if (masked)
8977 ops.safe_push (cond_expr);
8978 else
8979 {
8980 ops.safe_push (cond_expr0);
8981 ops.safe_push (cond_expr1);
8982 }
8983 ops.safe_push (then_clause);
8984 ops.safe_push (else_clause);
8985 vect_get_slp_defs (ops, slp_node, &vec_defs);
8986 vec_oprnds3 = vec_defs.pop ();
8987 vec_oprnds2 = vec_defs.pop ();
8988 if (!masked)
8989 vec_oprnds1 = vec_defs.pop ();
8990 vec_oprnds0 = vec_defs.pop ();
8991 }
8992 else
8993 {
8994 if (masked)
8995 {
8996 vec_cond_lhs
8997 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
8998 comp_vectype);
8999 }
9000 else
9001 {
9002 vec_cond_lhs
9003 = vect_get_vec_def_for_operand (cond_expr0,
9004 stmt_info, comp_vectype);
9005 vec_cond_rhs
9006 = vect_get_vec_def_for_operand (cond_expr1,
9007 stmt_info, comp_vectype);
9008 }
9009 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
9010 stmt_info);
9011 if (reduction_type != EXTRACT_LAST_REDUCTION)
9012 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
9013 stmt_info);
9014 }
9015 }
9016 else
9017 {
9018 vec_cond_lhs
9019 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
9020 if (!masked)
9021 vec_cond_rhs
9022 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
9023
9024 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9025 vec_oprnds2.pop ());
9026 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
9027 vec_oprnds3.pop ());
9028 }
9029
9030 if (!slp_node)
9031 {
9032 vec_oprnds0.quick_push (vec_cond_lhs);
9033 if (!masked)
9034 vec_oprnds1.quick_push (vec_cond_rhs);
9035 vec_oprnds2.quick_push (vec_then_clause);
9036 vec_oprnds3.quick_push (vec_else_clause);
9037 }
9038
9039 /* Arguments are ready. Create the new vector stmt. */
9040 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9041 {
9042 vec_then_clause = vec_oprnds2[i];
9043 vec_else_clause = vec_oprnds3[i];
9044
9045 if (masked)
9046 vec_compare = vec_cond_lhs;
9047 else
9048 {
9049 vec_cond_rhs = vec_oprnds1[i];
9050 if (bitop1 == NOP_EXPR)
9051 vec_compare = build2 (cond_code, vec_cmp_type,
9052 vec_cond_lhs, vec_cond_rhs);
9053 else
9054 {
9055 new_temp = make_ssa_name (vec_cmp_type);
9056 gassign *new_stmt;
9057 if (bitop1 == BIT_NOT_EXPR)
9058 new_stmt = gimple_build_assign (new_temp, bitop1,
9059 vec_cond_rhs);
9060 else
9061 new_stmt
9062 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9063 vec_cond_rhs);
9064 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9065 if (bitop2 == NOP_EXPR)
9066 vec_compare = new_temp;
9067 else if (bitop2 == BIT_NOT_EXPR)
9068 {
9069 /* Instead of doing ~x ? y : z do x ? z : y. */
9070 vec_compare = new_temp;
9071 std::swap (vec_then_clause, vec_else_clause);
9072 }
9073 else
9074 {
9075 vec_compare = make_ssa_name (vec_cmp_type);
9076 new_stmt
9077 = gimple_build_assign (vec_compare, bitop2,
9078 vec_cond_lhs, new_temp);
9079 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9080 }
9081 }
9082 }
9083 if (reduction_type == EXTRACT_LAST_REDUCTION)
9084 {
9085 if (!is_gimple_val (vec_compare))
9086 {
9087 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9088 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9089 vec_compare);
9090 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9091 vec_compare = vec_compare_name;
9092 }
9093 gcall *new_stmt = gimple_build_call_internal
9094 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9095 vec_then_clause);
9096 gimple_call_set_lhs (new_stmt, scalar_dest);
9097 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9098 if (stmt_info->stmt == gsi_stmt (*gsi))
9099 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9100 else
9101 {
9102 /* In this case we're moving the definition to later in the
9103 block. That doesn't matter because the only uses of the
9104 lhs are in phi statements. */
9105 gimple_stmt_iterator old_gsi
9106 = gsi_for_stmt (stmt_info->stmt);
9107 gsi_remove (&old_gsi, true);
9108 new_stmt_info
9109 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9110 }
9111 }
9112 else
9113 {
9114 new_temp = make_ssa_name (vec_dest);
9115 gassign *new_stmt
9116 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9117 vec_then_clause, vec_else_clause);
9118 new_stmt_info
9119 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9120 }
9121 if (slp_node)
9122 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9123 }
9124
9125 if (slp_node)
9126 continue;
9127
9128 if (j == 0)
9129 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9130 else
9131 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9132
9133 prev_stmt_info = new_stmt_info;
9134 }
9135
9136 vec_oprnds0.release ();
9137 vec_oprnds1.release ();
9138 vec_oprnds2.release ();
9139 vec_oprnds3.release ();
9140
9141 return true;
9142 }
9143
9144 /* vectorizable_comparison.
9145
9146 Check if STMT_INFO is comparison expression that can be vectorized.
9147 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9148 comparison, put it in VEC_STMT, and insert it at GSI.
9149
9150 Return true if STMT_INFO is vectorizable in this way. */
9151
9152 static bool
9153 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9154 stmt_vec_info *vec_stmt,
9155 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9156 {
9157 vec_info *vinfo = stmt_info->vinfo;
9158 tree lhs, rhs1, rhs2;
9159 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9160 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9161 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9162 tree new_temp;
9163 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9164 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9165 int ndts = 2;
9166 poly_uint64 nunits;
9167 int ncopies;
9168 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9169 stmt_vec_info prev_stmt_info = NULL;
9170 int i, j;
9171 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9172 vec<tree> vec_oprnds0 = vNULL;
9173 vec<tree> vec_oprnds1 = vNULL;
9174 tree mask_type;
9175 tree mask;
9176
9177 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9178 return false;
9179
9180 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9181 return false;
9182
9183 mask_type = vectype;
9184 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9185
9186 if (slp_node)
9187 ncopies = 1;
9188 else
9189 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9190
9191 gcc_assert (ncopies >= 1);
9192 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9193 return false;
9194
9195 if (STMT_VINFO_LIVE_P (stmt_info))
9196 {
9197 if (dump_enabled_p ())
9198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9199 "value used after loop.\n");
9200 return false;
9201 }
9202
9203 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9204 if (!stmt)
9205 return false;
9206
9207 code = gimple_assign_rhs_code (stmt);
9208
9209 if (TREE_CODE_CLASS (code) != tcc_comparison)
9210 return false;
9211
9212 rhs1 = gimple_assign_rhs1 (stmt);
9213 rhs2 = gimple_assign_rhs2 (stmt);
9214
9215 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9216 return false;
9217
9218 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9219 return false;
9220
9221 if (vectype1 && vectype2
9222 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9223 TYPE_VECTOR_SUBPARTS (vectype2)))
9224 return false;
9225
9226 vectype = vectype1 ? vectype1 : vectype2;
9227
9228 /* Invariant comparison. */
9229 if (!vectype)
9230 {
9231 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9232 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9233 return false;
9234 }
9235 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9236 return false;
9237
9238 /* Can't compare mask and non-mask types. */
9239 if (vectype1 && vectype2
9240 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9241 return false;
9242
9243 /* Boolean values may have another representation in vectors
9244 and therefore we prefer bit operations over comparison for
9245 them (which also works for scalar masks). We store opcodes
9246 to use in bitop1 and bitop2. Statement is vectorized as
9247 BITOP2 (rhs1 BITOP1 rhs2) or
9248 rhs1 BITOP2 (BITOP1 rhs2)
9249 depending on bitop1 and bitop2 arity. */
9250 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9251 {
9252 if (code == GT_EXPR)
9253 {
9254 bitop1 = BIT_NOT_EXPR;
9255 bitop2 = BIT_AND_EXPR;
9256 }
9257 else if (code == GE_EXPR)
9258 {
9259 bitop1 = BIT_NOT_EXPR;
9260 bitop2 = BIT_IOR_EXPR;
9261 }
9262 else if (code == LT_EXPR)
9263 {
9264 bitop1 = BIT_NOT_EXPR;
9265 bitop2 = BIT_AND_EXPR;
9266 std::swap (rhs1, rhs2);
9267 std::swap (dts[0], dts[1]);
9268 }
9269 else if (code == LE_EXPR)
9270 {
9271 bitop1 = BIT_NOT_EXPR;
9272 bitop2 = BIT_IOR_EXPR;
9273 std::swap (rhs1, rhs2);
9274 std::swap (dts[0], dts[1]);
9275 }
9276 else
9277 {
9278 bitop1 = BIT_XOR_EXPR;
9279 if (code == EQ_EXPR)
9280 bitop2 = BIT_NOT_EXPR;
9281 }
9282 }
9283
9284 if (!vec_stmt)
9285 {
9286 if (bitop1 == NOP_EXPR)
9287 {
9288 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9289 return false;
9290 }
9291 else
9292 {
9293 machine_mode mode = TYPE_MODE (vectype);
9294 optab optab;
9295
9296 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9297 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9298 return false;
9299
9300 if (bitop2 != NOP_EXPR)
9301 {
9302 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9303 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9304 return false;
9305 }
9306 }
9307
9308 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9309 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9310 dts, ndts, slp_node, cost_vec);
9311 return true;
9312 }
9313
9314 /* Transform. */
9315 if (!slp_node)
9316 {
9317 vec_oprnds0.create (1);
9318 vec_oprnds1.create (1);
9319 }
9320
9321 /* Handle def. */
9322 lhs = gimple_assign_lhs (stmt);
9323 mask = vect_create_destination_var (lhs, mask_type);
9324
9325 /* Handle cmp expr. */
9326 for (j = 0; j < ncopies; j++)
9327 {
9328 stmt_vec_info new_stmt_info = NULL;
9329 if (j == 0)
9330 {
9331 if (slp_node)
9332 {
9333 auto_vec<tree, 2> ops;
9334 auto_vec<vec<tree>, 2> vec_defs;
9335
9336 ops.safe_push (rhs1);
9337 ops.safe_push (rhs2);
9338 vect_get_slp_defs (ops, slp_node, &vec_defs);
9339 vec_oprnds1 = vec_defs.pop ();
9340 vec_oprnds0 = vec_defs.pop ();
9341 }
9342 else
9343 {
9344 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9345 vectype);
9346 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9347 vectype);
9348 }
9349 }
9350 else
9351 {
9352 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9353 vec_oprnds0.pop ());
9354 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9355 vec_oprnds1.pop ());
9356 }
9357
9358 if (!slp_node)
9359 {
9360 vec_oprnds0.quick_push (vec_rhs1);
9361 vec_oprnds1.quick_push (vec_rhs2);
9362 }
9363
9364 /* Arguments are ready. Create the new vector stmt. */
9365 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9366 {
9367 vec_rhs2 = vec_oprnds1[i];
9368
9369 new_temp = make_ssa_name (mask);
9370 if (bitop1 == NOP_EXPR)
9371 {
9372 gassign *new_stmt = gimple_build_assign (new_temp, code,
9373 vec_rhs1, vec_rhs2);
9374 new_stmt_info
9375 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9376 }
9377 else
9378 {
9379 gassign *new_stmt;
9380 if (bitop1 == BIT_NOT_EXPR)
9381 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9382 else
9383 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9384 vec_rhs2);
9385 new_stmt_info
9386 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9387 if (bitop2 != NOP_EXPR)
9388 {
9389 tree res = make_ssa_name (mask);
9390 if (bitop2 == BIT_NOT_EXPR)
9391 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9392 else
9393 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9394 new_temp);
9395 new_stmt_info
9396 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9397 }
9398 }
9399 if (slp_node)
9400 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9401 }
9402
9403 if (slp_node)
9404 continue;
9405
9406 if (j == 0)
9407 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9408 else
9409 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9410
9411 prev_stmt_info = new_stmt_info;
9412 }
9413
9414 vec_oprnds0.release ();
9415 vec_oprnds1.release ();
9416
9417 return true;
9418 }
9419
9420 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9421 can handle all live statements in the node. Otherwise return true
9422 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9423 GSI and VEC_STMT are as for vectorizable_live_operation. */
9424
9425 static bool
9426 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9427 slp_tree slp_node, stmt_vec_info *vec_stmt,
9428 stmt_vector_for_cost *cost_vec)
9429 {
9430 if (slp_node)
9431 {
9432 stmt_vec_info slp_stmt_info;
9433 unsigned int i;
9434 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9435 {
9436 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9437 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9438 vec_stmt, cost_vec))
9439 return false;
9440 }
9441 }
9442 else if (STMT_VINFO_LIVE_P (stmt_info)
9443 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9444 vec_stmt, cost_vec))
9445 return false;
9446
9447 return true;
9448 }
9449
9450 /* Make sure the statement is vectorizable. */
9451
9452 opt_result
9453 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9454 slp_tree node, slp_instance node_instance,
9455 stmt_vector_for_cost *cost_vec)
9456 {
9457 vec_info *vinfo = stmt_info->vinfo;
9458 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9459 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9460 bool ok;
9461 gimple_seq pattern_def_seq;
9462
9463 if (dump_enabled_p ())
9464 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
9465 stmt_info->stmt);
9466
9467 if (gimple_has_volatile_ops (stmt_info->stmt))
9468 return opt_result::failure_at (stmt_info->stmt,
9469 "not vectorized:"
9470 " stmt has volatile operands: %G\n",
9471 stmt_info->stmt);
9472
9473 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9474 && node == NULL
9475 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9476 {
9477 gimple_stmt_iterator si;
9478
9479 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9480 {
9481 stmt_vec_info pattern_def_stmt_info
9482 = vinfo->lookup_stmt (gsi_stmt (si));
9483 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9484 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9485 {
9486 /* Analyze def stmt of STMT if it's a pattern stmt. */
9487 if (dump_enabled_p ())
9488 dump_printf_loc (MSG_NOTE, vect_location,
9489 "==> examining pattern def statement: %G",
9490 pattern_def_stmt_info->stmt);
9491
9492 opt_result res
9493 = vect_analyze_stmt (pattern_def_stmt_info,
9494 need_to_vectorize, node, node_instance,
9495 cost_vec);
9496 if (!res)
9497 return res;
9498 }
9499 }
9500 }
9501
9502 /* Skip stmts that do not need to be vectorized. In loops this is expected
9503 to include:
9504 - the COND_EXPR which is the loop exit condition
9505 - any LABEL_EXPRs in the loop
9506 - computations that are used only for array indexing or loop control.
9507 In basic blocks we only analyze statements that are a part of some SLP
9508 instance, therefore, all the statements are relevant.
9509
9510 Pattern statement needs to be analyzed instead of the original statement
9511 if the original statement is not relevant. Otherwise, we analyze both
9512 statements. In basic blocks we are called from some SLP instance
9513 traversal, don't analyze pattern stmts instead, the pattern stmts
9514 already will be part of SLP instance. */
9515
9516 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9517 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9518 && !STMT_VINFO_LIVE_P (stmt_info))
9519 {
9520 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9521 && pattern_stmt_info
9522 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9523 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9524 {
9525 /* Analyze PATTERN_STMT instead of the original stmt. */
9526 stmt_info = pattern_stmt_info;
9527 if (dump_enabled_p ())
9528 dump_printf_loc (MSG_NOTE, vect_location,
9529 "==> examining pattern statement: %G",
9530 stmt_info->stmt);
9531 }
9532 else
9533 {
9534 if (dump_enabled_p ())
9535 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9536
9537 return opt_result::success ();
9538 }
9539 }
9540 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9541 && node == NULL
9542 && pattern_stmt_info
9543 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9544 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9545 {
9546 /* Analyze PATTERN_STMT too. */
9547 if (dump_enabled_p ())
9548 dump_printf_loc (MSG_NOTE, vect_location,
9549 "==> examining pattern statement: %G",
9550 pattern_stmt_info->stmt);
9551
9552 opt_result res
9553 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9554 node_instance, cost_vec);
9555 if (!res)
9556 return res;
9557 }
9558
9559 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9560 {
9561 case vect_internal_def:
9562 break;
9563
9564 case vect_reduction_def:
9565 case vect_nested_cycle:
9566 gcc_assert (!bb_vinfo
9567 && (relevance == vect_used_in_outer
9568 || relevance == vect_used_in_outer_by_reduction
9569 || relevance == vect_used_by_reduction
9570 || relevance == vect_unused_in_scope
9571 || relevance == vect_used_only_live));
9572 break;
9573
9574 case vect_induction_def:
9575 gcc_assert (!bb_vinfo);
9576 break;
9577
9578 case vect_constant_def:
9579 case vect_external_def:
9580 case vect_unknown_def_type:
9581 default:
9582 gcc_unreachable ();
9583 }
9584
9585 if (STMT_VINFO_RELEVANT_P (stmt_info))
9586 {
9587 tree type = gimple_expr_type (stmt_info->stmt);
9588 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9589 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9590 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9591 || (call && gimple_call_lhs (call) == NULL_TREE));
9592 *need_to_vectorize = true;
9593 }
9594
9595 if (PURE_SLP_STMT (stmt_info) && !node)
9596 {
9597 if (dump_enabled_p ())
9598 dump_printf_loc (MSG_NOTE, vect_location,
9599 "handled only by SLP analysis\n");
9600 return opt_result::success ();
9601 }
9602
9603 ok = true;
9604 if (!bb_vinfo
9605 && (STMT_VINFO_RELEVANT_P (stmt_info)
9606 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9607 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9608 -mveclibabi= takes preference over library functions with
9609 the simd attribute. */
9610 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9611 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9612 cost_vec)
9613 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9614 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9615 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9616 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9617 cost_vec)
9618 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9619 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9620 node_instance, cost_vec)
9621 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9622 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9623 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9624 cost_vec)
9625 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9626 cost_vec));
9627 else
9628 {
9629 if (bb_vinfo)
9630 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9631 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9632 cost_vec)
9633 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9634 cost_vec)
9635 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9636 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9637 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9638 cost_vec)
9639 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9640 cost_vec)
9641 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9642 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
9643 cost_vec)
9644 || vectorizable_comparison (stmt_info, NULL, NULL, node,
9645 cost_vec));
9646 }
9647
9648 if (!ok)
9649 return opt_result::failure_at (stmt_info->stmt,
9650 "not vectorized:"
9651 " relevant stmt not supported: %G",
9652 stmt_info->stmt);
9653
9654 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9655 need extra handling, except for vectorizable reductions. */
9656 if (!bb_vinfo
9657 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9658 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9659 return opt_result::failure_at (stmt_info->stmt,
9660 "not vectorized:"
9661 " live stmt not supported: %G",
9662 stmt_info->stmt);
9663
9664 return opt_result::success ();
9665 }
9666
9667
9668 /* Function vect_transform_stmt.
9669
9670 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9671
9672 bool
9673 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9674 slp_tree slp_node, slp_instance slp_node_instance)
9675 {
9676 vec_info *vinfo = stmt_info->vinfo;
9677 bool is_store = false;
9678 stmt_vec_info vec_stmt = NULL;
9679 bool done;
9680
9681 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9682 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9683
9684 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9685 && nested_in_vect_loop_p
9686 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9687 stmt_info));
9688
9689 gimple *stmt = stmt_info->stmt;
9690 switch (STMT_VINFO_TYPE (stmt_info))
9691 {
9692 case type_demotion_vec_info_type:
9693 case type_promotion_vec_info_type:
9694 case type_conversion_vec_info_type:
9695 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9696 NULL);
9697 gcc_assert (done);
9698 break;
9699
9700 case induc_vec_info_type:
9701 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9702 NULL);
9703 gcc_assert (done);
9704 break;
9705
9706 case shift_vec_info_type:
9707 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9708 gcc_assert (done);
9709 break;
9710
9711 case op_vec_info_type:
9712 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9713 NULL);
9714 gcc_assert (done);
9715 break;
9716
9717 case assignment_vec_info_type:
9718 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9719 NULL);
9720 gcc_assert (done);
9721 break;
9722
9723 case load_vec_info_type:
9724 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9725 slp_node_instance, NULL);
9726 gcc_assert (done);
9727 break;
9728
9729 case store_vec_info_type:
9730 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9731 gcc_assert (done);
9732 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9733 {
9734 /* In case of interleaving, the whole chain is vectorized when the
9735 last store in the chain is reached. Store stmts before the last
9736 one are skipped, and there vec_stmt_info shouldn't be freed
9737 meanwhile. */
9738 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9739 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9740 is_store = true;
9741 }
9742 else
9743 is_store = true;
9744 break;
9745
9746 case condition_vec_info_type:
9747 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
9748 slp_node, NULL);
9749 gcc_assert (done);
9750 break;
9751
9752 case comparison_vec_info_type:
9753 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
9754 slp_node, NULL);
9755 gcc_assert (done);
9756 break;
9757
9758 case call_vec_info_type:
9759 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9760 stmt = gsi_stmt (*gsi);
9761 break;
9762
9763 case call_simd_clone_vec_info_type:
9764 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9765 slp_node, NULL);
9766 stmt = gsi_stmt (*gsi);
9767 break;
9768
9769 case reduc_vec_info_type:
9770 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9771 slp_node_instance, NULL);
9772 gcc_assert (done);
9773 break;
9774
9775 default:
9776 if (!STMT_VINFO_LIVE_P (stmt_info))
9777 {
9778 if (dump_enabled_p ())
9779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9780 "stmt not supported.\n");
9781 gcc_unreachable ();
9782 }
9783 }
9784
9785 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9786 This would break hybrid SLP vectorization. */
9787 if (slp_node)
9788 gcc_assert (!vec_stmt
9789 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9790
9791 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9792 is being vectorized, but outside the immediately enclosing loop. */
9793 if (vec_stmt
9794 && nested_p
9795 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9796 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9797 || STMT_VINFO_RELEVANT (stmt_info) ==
9798 vect_used_in_outer_by_reduction))
9799 {
9800 struct loop *innerloop = LOOP_VINFO_LOOP (
9801 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9802 imm_use_iterator imm_iter;
9803 use_operand_p use_p;
9804 tree scalar_dest;
9805
9806 if (dump_enabled_p ())
9807 dump_printf_loc (MSG_NOTE, vect_location,
9808 "Record the vdef for outer-loop vectorization.\n");
9809
9810 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9811 (to be used when vectorizing outer-loop stmts that use the DEF of
9812 STMT). */
9813 if (gimple_code (stmt) == GIMPLE_PHI)
9814 scalar_dest = PHI_RESULT (stmt);
9815 else
9816 scalar_dest = gimple_get_lhs (stmt);
9817
9818 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9819 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9820 {
9821 stmt_vec_info exit_phi_info
9822 = vinfo->lookup_stmt (USE_STMT (use_p));
9823 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9824 }
9825 }
9826
9827 /* Handle stmts whose DEF is used outside the loop-nest that is
9828 being vectorized. */
9829 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9830 {
9831 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9832 NULL);
9833 gcc_assert (done);
9834 }
9835
9836 if (vec_stmt)
9837 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9838
9839 return is_store;
9840 }
9841
9842
9843 /* Remove a group of stores (for SLP or interleaving), free their
9844 stmt_vec_info. */
9845
9846 void
9847 vect_remove_stores (stmt_vec_info first_stmt_info)
9848 {
9849 vec_info *vinfo = first_stmt_info->vinfo;
9850 stmt_vec_info next_stmt_info = first_stmt_info;
9851
9852 while (next_stmt_info)
9853 {
9854 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9855 next_stmt_info = vect_orig_stmt (next_stmt_info);
9856 /* Free the attached stmt_vec_info and remove the stmt. */
9857 vinfo->remove_stmt (next_stmt_info);
9858 next_stmt_info = tmp;
9859 }
9860 }
9861
9862 /* Function get_vectype_for_scalar_type_and_size.
9863
9864 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9865 by the target. */
9866
9867 tree
9868 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9869 {
9870 tree orig_scalar_type = scalar_type;
9871 scalar_mode inner_mode;
9872 machine_mode simd_mode;
9873 poly_uint64 nunits;
9874 tree vectype;
9875
9876 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9877 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9878 return NULL_TREE;
9879
9880 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9881
9882 /* For vector types of elements whose mode precision doesn't
9883 match their types precision we use a element type of mode
9884 precision. The vectorization routines will have to make sure
9885 they support the proper result truncation/extension.
9886 We also make sure to build vector types with INTEGER_TYPE
9887 component type only. */
9888 if (INTEGRAL_TYPE_P (scalar_type)
9889 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9890 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9891 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9892 TYPE_UNSIGNED (scalar_type));
9893
9894 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9895 When the component mode passes the above test simply use a type
9896 corresponding to that mode. The theory is that any use that
9897 would cause problems with this will disable vectorization anyway. */
9898 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9899 && !INTEGRAL_TYPE_P (scalar_type))
9900 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9901
9902 /* We can't build a vector type of elements with alignment bigger than
9903 their size. */
9904 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9905 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9906 TYPE_UNSIGNED (scalar_type));
9907
9908 /* If we felt back to using the mode fail if there was
9909 no scalar type for it. */
9910 if (scalar_type == NULL_TREE)
9911 return NULL_TREE;
9912
9913 /* If no size was supplied use the mode the target prefers. Otherwise
9914 lookup a vector mode of the specified size. */
9915 if (known_eq (size, 0U))
9916 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9917 else if (!multiple_p (size, nbytes, &nunits)
9918 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9919 return NULL_TREE;
9920 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9921 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9922 return NULL_TREE;
9923
9924 vectype = build_vector_type (scalar_type, nunits);
9925
9926 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9927 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9928 return NULL_TREE;
9929
9930 /* Re-attach the address-space qualifier if we canonicalized the scalar
9931 type. */
9932 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9933 return build_qualified_type
9934 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9935
9936 return vectype;
9937 }
9938
9939 poly_uint64 current_vector_size;
9940
9941 /* Function get_vectype_for_scalar_type.
9942
9943 Returns the vector type corresponding to SCALAR_TYPE as supported
9944 by the target. */
9945
9946 tree
9947 get_vectype_for_scalar_type (tree scalar_type)
9948 {
9949 tree vectype;
9950 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9951 current_vector_size);
9952 if (vectype
9953 && known_eq (current_vector_size, 0U))
9954 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9955 return vectype;
9956 }
9957
9958 /* Function get_mask_type_for_scalar_type.
9959
9960 Returns the mask type corresponding to a result of comparison
9961 of vectors of specified SCALAR_TYPE as supported by target. */
9962
9963 tree
9964 get_mask_type_for_scalar_type (tree scalar_type)
9965 {
9966 tree vectype = get_vectype_for_scalar_type (scalar_type);
9967
9968 if (!vectype)
9969 return NULL;
9970
9971 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9972 current_vector_size);
9973 }
9974
9975 /* Function get_same_sized_vectype
9976
9977 Returns a vector type corresponding to SCALAR_TYPE of size
9978 VECTOR_TYPE if supported by the target. */
9979
9980 tree
9981 get_same_sized_vectype (tree scalar_type, tree vector_type)
9982 {
9983 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9984 return build_same_sized_truth_vector_type (vector_type);
9985
9986 return get_vectype_for_scalar_type_and_size
9987 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9988 }
9989
9990 /* Function vect_is_simple_use.
9991
9992 Input:
9993 VINFO - the vect info of the loop or basic block that is being vectorized.
9994 OPERAND - operand in the loop or bb.
9995 Output:
9996 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
9997 case OPERAND is an SSA_NAME that is defined in the vectorizable region
9998 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
9999 the definition could be anywhere in the function
10000 DT - the type of definition
10001
10002 Returns whether a stmt with OPERAND can be vectorized.
10003 For loops, supportable operands are constants, loop invariants, and operands
10004 that are defined by the current iteration of the loop. Unsupportable
10005 operands are those that are defined by a previous iteration of the loop (as
10006 is the case in reduction/induction computations).
10007 For basic blocks, supportable operands are constants and bb invariants.
10008 For now, operands defined outside the basic block are not supported. */
10009
10010 bool
10011 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10012 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
10013 {
10014 if (def_stmt_info_out)
10015 *def_stmt_info_out = NULL;
10016 if (def_stmt_out)
10017 *def_stmt_out = NULL;
10018 *dt = vect_unknown_def_type;
10019
10020 if (dump_enabled_p ())
10021 {
10022 dump_printf_loc (MSG_NOTE, vect_location,
10023 "vect_is_simple_use: operand ");
10024 if (TREE_CODE (operand) == SSA_NAME
10025 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10026 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10027 else
10028 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10029 }
10030
10031 if (CONSTANT_CLASS_P (operand))
10032 *dt = vect_constant_def;
10033 else if (is_gimple_min_invariant (operand))
10034 *dt = vect_external_def;
10035 else if (TREE_CODE (operand) != SSA_NAME)
10036 *dt = vect_unknown_def_type;
10037 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10038 *dt = vect_external_def;
10039 else
10040 {
10041 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10042 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10043 if (!stmt_vinfo)
10044 *dt = vect_external_def;
10045 else
10046 {
10047 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
10048 def_stmt = stmt_vinfo->stmt;
10049 switch (gimple_code (def_stmt))
10050 {
10051 case GIMPLE_PHI:
10052 case GIMPLE_ASSIGN:
10053 case GIMPLE_CALL:
10054 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10055 break;
10056 default:
10057 *dt = vect_unknown_def_type;
10058 break;
10059 }
10060 if (def_stmt_info_out)
10061 *def_stmt_info_out = stmt_vinfo;
10062 }
10063 if (def_stmt_out)
10064 *def_stmt_out = def_stmt;
10065 }
10066
10067 if (dump_enabled_p ())
10068 {
10069 dump_printf (MSG_NOTE, ", type of def: ");
10070 switch (*dt)
10071 {
10072 case vect_uninitialized_def:
10073 dump_printf (MSG_NOTE, "uninitialized\n");
10074 break;
10075 case vect_constant_def:
10076 dump_printf (MSG_NOTE, "constant\n");
10077 break;
10078 case vect_external_def:
10079 dump_printf (MSG_NOTE, "external\n");
10080 break;
10081 case vect_internal_def:
10082 dump_printf (MSG_NOTE, "internal\n");
10083 break;
10084 case vect_induction_def:
10085 dump_printf (MSG_NOTE, "induction\n");
10086 break;
10087 case vect_reduction_def:
10088 dump_printf (MSG_NOTE, "reduction\n");
10089 break;
10090 case vect_double_reduction_def:
10091 dump_printf (MSG_NOTE, "double reduction\n");
10092 break;
10093 case vect_nested_cycle:
10094 dump_printf (MSG_NOTE, "nested cycle\n");
10095 break;
10096 case vect_unknown_def_type:
10097 dump_printf (MSG_NOTE, "unknown\n");
10098 break;
10099 }
10100 }
10101
10102 if (*dt == vect_unknown_def_type)
10103 {
10104 if (dump_enabled_p ())
10105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10106 "Unsupported pattern.\n");
10107 return false;
10108 }
10109
10110 return true;
10111 }
10112
10113 /* Function vect_is_simple_use.
10114
10115 Same as vect_is_simple_use but also determines the vector operand
10116 type of OPERAND and stores it to *VECTYPE. If the definition of
10117 OPERAND is vect_uninitialized_def, vect_constant_def or
10118 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10119 is responsible to compute the best suited vector type for the
10120 scalar operand. */
10121
10122 bool
10123 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10124 tree *vectype, stmt_vec_info *def_stmt_info_out,
10125 gimple **def_stmt_out)
10126 {
10127 stmt_vec_info def_stmt_info;
10128 gimple *def_stmt;
10129 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10130 return false;
10131
10132 if (def_stmt_out)
10133 *def_stmt_out = def_stmt;
10134 if (def_stmt_info_out)
10135 *def_stmt_info_out = def_stmt_info;
10136
10137 /* Now get a vector type if the def is internal, otherwise supply
10138 NULL_TREE and leave it up to the caller to figure out a proper
10139 type for the use stmt. */
10140 if (*dt == vect_internal_def
10141 || *dt == vect_induction_def
10142 || *dt == vect_reduction_def
10143 || *dt == vect_double_reduction_def
10144 || *dt == vect_nested_cycle)
10145 {
10146 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10147 gcc_assert (*vectype != NULL_TREE);
10148 if (dump_enabled_p ())
10149 dump_printf_loc (MSG_NOTE, vect_location,
10150 "vect_is_simple_use: vectype %T\n", *vectype);
10151 }
10152 else if (*dt == vect_uninitialized_def
10153 || *dt == vect_constant_def
10154 || *dt == vect_external_def)
10155 *vectype = NULL_TREE;
10156 else
10157 gcc_unreachable ();
10158
10159 return true;
10160 }
10161
10162
10163 /* Function supportable_widening_operation
10164
10165 Check whether an operation represented by the code CODE is a
10166 widening operation that is supported by the target platform in
10167 vector form (i.e., when operating on arguments of type VECTYPE_IN
10168 producing a result of type VECTYPE_OUT).
10169
10170 Widening operations we currently support are NOP (CONVERT), FLOAT,
10171 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10172 are supported by the target platform either directly (via vector
10173 tree-codes), or via target builtins.
10174
10175 Output:
10176 - CODE1 and CODE2 are codes of vector operations to be used when
10177 vectorizing the operation, if available.
10178 - MULTI_STEP_CVT determines the number of required intermediate steps in
10179 case of multi-step conversion (like char->short->int - in that case
10180 MULTI_STEP_CVT will be 1).
10181 - INTERM_TYPES contains the intermediate type required to perform the
10182 widening operation (short in the above example). */
10183
10184 bool
10185 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10186 tree vectype_out, tree vectype_in,
10187 enum tree_code *code1, enum tree_code *code2,
10188 int *multi_step_cvt,
10189 vec<tree> *interm_types)
10190 {
10191 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10192 struct loop *vect_loop = NULL;
10193 machine_mode vec_mode;
10194 enum insn_code icode1, icode2;
10195 optab optab1, optab2;
10196 tree vectype = vectype_in;
10197 tree wide_vectype = vectype_out;
10198 enum tree_code c1, c2;
10199 int i;
10200 tree prev_type, intermediate_type;
10201 machine_mode intermediate_mode, prev_mode;
10202 optab optab3, optab4;
10203
10204 *multi_step_cvt = 0;
10205 if (loop_info)
10206 vect_loop = LOOP_VINFO_LOOP (loop_info);
10207
10208 switch (code)
10209 {
10210 case WIDEN_MULT_EXPR:
10211 /* The result of a vectorized widening operation usually requires
10212 two vectors (because the widened results do not fit into one vector).
10213 The generated vector results would normally be expected to be
10214 generated in the same order as in the original scalar computation,
10215 i.e. if 8 results are generated in each vector iteration, they are
10216 to be organized as follows:
10217 vect1: [res1,res2,res3,res4],
10218 vect2: [res5,res6,res7,res8].
10219
10220 However, in the special case that the result of the widening
10221 operation is used in a reduction computation only, the order doesn't
10222 matter (because when vectorizing a reduction we change the order of
10223 the computation). Some targets can take advantage of this and
10224 generate more efficient code. For example, targets like Altivec,
10225 that support widen_mult using a sequence of {mult_even,mult_odd}
10226 generate the following vectors:
10227 vect1: [res1,res3,res5,res7],
10228 vect2: [res2,res4,res6,res8].
10229
10230 When vectorizing outer-loops, we execute the inner-loop sequentially
10231 (each vectorized inner-loop iteration contributes to VF outer-loop
10232 iterations in parallel). We therefore don't allow to change the
10233 order of the computation in the inner-loop during outer-loop
10234 vectorization. */
10235 /* TODO: Another case in which order doesn't *really* matter is when we
10236 widen and then contract again, e.g. (short)((int)x * y >> 8).
10237 Normally, pack_trunc performs an even/odd permute, whereas the
10238 repack from an even/odd expansion would be an interleave, which
10239 would be significantly simpler for e.g. AVX2. */
10240 /* In any case, in order to avoid duplicating the code below, recurse
10241 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10242 are properly set up for the caller. If we fail, we'll continue with
10243 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10244 if (vect_loop
10245 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10246 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10247 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10248 stmt_info, vectype_out,
10249 vectype_in, code1, code2,
10250 multi_step_cvt, interm_types))
10251 {
10252 /* Elements in a vector with vect_used_by_reduction property cannot
10253 be reordered if the use chain with this property does not have the
10254 same operation. One such an example is s += a * b, where elements
10255 in a and b cannot be reordered. Here we check if the vector defined
10256 by STMT is only directly used in the reduction statement. */
10257 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10258 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10259 if (use_stmt_info
10260 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10261 return true;
10262 }
10263 c1 = VEC_WIDEN_MULT_LO_EXPR;
10264 c2 = VEC_WIDEN_MULT_HI_EXPR;
10265 break;
10266
10267 case DOT_PROD_EXPR:
10268 c1 = DOT_PROD_EXPR;
10269 c2 = DOT_PROD_EXPR;
10270 break;
10271
10272 case SAD_EXPR:
10273 c1 = SAD_EXPR;
10274 c2 = SAD_EXPR;
10275 break;
10276
10277 case VEC_WIDEN_MULT_EVEN_EXPR:
10278 /* Support the recursion induced just above. */
10279 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10280 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10281 break;
10282
10283 case WIDEN_LSHIFT_EXPR:
10284 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10285 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10286 break;
10287
10288 CASE_CONVERT:
10289 c1 = VEC_UNPACK_LO_EXPR;
10290 c2 = VEC_UNPACK_HI_EXPR;
10291 break;
10292
10293 case FLOAT_EXPR:
10294 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10295 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10296 break;
10297
10298 case FIX_TRUNC_EXPR:
10299 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10300 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10301 break;
10302
10303 default:
10304 gcc_unreachable ();
10305 }
10306
10307 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10308 std::swap (c1, c2);
10309
10310 if (code == FIX_TRUNC_EXPR)
10311 {
10312 /* The signedness is determined from output operand. */
10313 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10314 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10315 }
10316 else if (CONVERT_EXPR_CODE_P (code)
10317 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
10318 && VECTOR_BOOLEAN_TYPE_P (vectype)
10319 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
10320 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10321 {
10322 /* If the input and result modes are the same, a different optab
10323 is needed where we pass in the number of units in vectype. */
10324 optab1 = vec_unpacks_sbool_lo_optab;
10325 optab2 = vec_unpacks_sbool_hi_optab;
10326 }
10327 else
10328 {
10329 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10330 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10331 }
10332
10333 if (!optab1 || !optab2)
10334 return false;
10335
10336 vec_mode = TYPE_MODE (vectype);
10337 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10338 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10339 return false;
10340
10341 *code1 = c1;
10342 *code2 = c2;
10343
10344 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10345 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10346 {
10347 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10348 return true;
10349 /* For scalar masks we may have different boolean
10350 vector types having the same QImode. Thus we
10351 add additional check for elements number. */
10352 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10353 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10354 return true;
10355 }
10356
10357 /* Check if it's a multi-step conversion that can be done using intermediate
10358 types. */
10359
10360 prev_type = vectype;
10361 prev_mode = vec_mode;
10362
10363 if (!CONVERT_EXPR_CODE_P (code))
10364 return false;
10365
10366 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10367 intermediate steps in promotion sequence. We try
10368 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10369 not. */
10370 interm_types->create (MAX_INTERM_CVT_STEPS);
10371 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10372 {
10373 intermediate_mode = insn_data[icode1].operand[0].mode;
10374 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10375 {
10376 intermediate_type = vect_halve_mask_nunits (prev_type);
10377 if (intermediate_mode != TYPE_MODE (intermediate_type))
10378 return false;
10379 }
10380 else
10381 intermediate_type
10382 = lang_hooks.types.type_for_mode (intermediate_mode,
10383 TYPE_UNSIGNED (prev_type));
10384
10385 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10386 && VECTOR_BOOLEAN_TYPE_P (prev_type)
10387 && intermediate_mode == prev_mode
10388 && SCALAR_INT_MODE_P (prev_mode))
10389 {
10390 /* If the input and result modes are the same, a different optab
10391 is needed where we pass in the number of units in vectype. */
10392 optab3 = vec_unpacks_sbool_lo_optab;
10393 optab4 = vec_unpacks_sbool_hi_optab;
10394 }
10395 else
10396 {
10397 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10398 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10399 }
10400
10401 if (!optab3 || !optab4
10402 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10403 || insn_data[icode1].operand[0].mode != intermediate_mode
10404 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10405 || insn_data[icode2].operand[0].mode != intermediate_mode
10406 || ((icode1 = optab_handler (optab3, intermediate_mode))
10407 == CODE_FOR_nothing)
10408 || ((icode2 = optab_handler (optab4, intermediate_mode))
10409 == CODE_FOR_nothing))
10410 break;
10411
10412 interm_types->quick_push (intermediate_type);
10413 (*multi_step_cvt)++;
10414
10415 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10416 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10417 {
10418 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10419 return true;
10420 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10421 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
10422 return true;
10423 }
10424
10425 prev_type = intermediate_type;
10426 prev_mode = intermediate_mode;
10427 }
10428
10429 interm_types->release ();
10430 return false;
10431 }
10432
10433
10434 /* Function supportable_narrowing_operation
10435
10436 Check whether an operation represented by the code CODE is a
10437 narrowing operation that is supported by the target platform in
10438 vector form (i.e., when operating on arguments of type VECTYPE_IN
10439 and producing a result of type VECTYPE_OUT).
10440
10441 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10442 and FLOAT. This function checks if these operations are supported by
10443 the target platform directly via vector tree-codes.
10444
10445 Output:
10446 - CODE1 is the code of a vector operation to be used when
10447 vectorizing the operation, if available.
10448 - MULTI_STEP_CVT determines the number of required intermediate steps in
10449 case of multi-step conversion (like int->short->char - in that case
10450 MULTI_STEP_CVT will be 1).
10451 - INTERM_TYPES contains the intermediate type required to perform the
10452 narrowing operation (short in the above example). */
10453
10454 bool
10455 supportable_narrowing_operation (enum tree_code code,
10456 tree vectype_out, tree vectype_in,
10457 enum tree_code *code1, int *multi_step_cvt,
10458 vec<tree> *interm_types)
10459 {
10460 machine_mode vec_mode;
10461 enum insn_code icode1;
10462 optab optab1, interm_optab;
10463 tree vectype = vectype_in;
10464 tree narrow_vectype = vectype_out;
10465 enum tree_code c1;
10466 tree intermediate_type, prev_type;
10467 machine_mode intermediate_mode, prev_mode;
10468 int i;
10469 bool uns;
10470
10471 *multi_step_cvt = 0;
10472 switch (code)
10473 {
10474 CASE_CONVERT:
10475 c1 = VEC_PACK_TRUNC_EXPR;
10476 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
10477 && VECTOR_BOOLEAN_TYPE_P (vectype)
10478 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
10479 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
10480 optab1 = vec_pack_sbool_trunc_optab;
10481 else
10482 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10483 break;
10484
10485 case FIX_TRUNC_EXPR:
10486 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10487 /* The signedness is determined from output operand. */
10488 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10489 break;
10490
10491 case FLOAT_EXPR:
10492 c1 = VEC_PACK_FLOAT_EXPR;
10493 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10494 break;
10495
10496 default:
10497 gcc_unreachable ();
10498 }
10499
10500 if (!optab1)
10501 return false;
10502
10503 vec_mode = TYPE_MODE (vectype);
10504 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10505 return false;
10506
10507 *code1 = c1;
10508
10509 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10510 {
10511 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10512 return true;
10513 /* For scalar masks we may have different boolean
10514 vector types having the same QImode. Thus we
10515 add additional check for elements number. */
10516 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10517 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10518 return true;
10519 }
10520
10521 if (code == FLOAT_EXPR)
10522 return false;
10523
10524 /* Check if it's a multi-step conversion that can be done using intermediate
10525 types. */
10526 prev_mode = vec_mode;
10527 prev_type = vectype;
10528 if (code == FIX_TRUNC_EXPR)
10529 uns = TYPE_UNSIGNED (vectype_out);
10530 else
10531 uns = TYPE_UNSIGNED (vectype);
10532
10533 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10534 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10535 costly than signed. */
10536 if (code == FIX_TRUNC_EXPR && uns)
10537 {
10538 enum insn_code icode2;
10539
10540 intermediate_type
10541 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10542 interm_optab
10543 = optab_for_tree_code (c1, intermediate_type, optab_default);
10544 if (interm_optab != unknown_optab
10545 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10546 && insn_data[icode1].operand[0].mode
10547 == insn_data[icode2].operand[0].mode)
10548 {
10549 uns = false;
10550 optab1 = interm_optab;
10551 icode1 = icode2;
10552 }
10553 }
10554
10555 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10556 intermediate steps in promotion sequence. We try
10557 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10558 interm_types->create (MAX_INTERM_CVT_STEPS);
10559 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10560 {
10561 intermediate_mode = insn_data[icode1].operand[0].mode;
10562 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10563 {
10564 intermediate_type = vect_double_mask_nunits (prev_type);
10565 if (intermediate_mode != TYPE_MODE (intermediate_type))
10566 return false;
10567 }
10568 else
10569 intermediate_type
10570 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10571 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
10572 && VECTOR_BOOLEAN_TYPE_P (prev_type)
10573 && intermediate_mode == prev_mode
10574 && SCALAR_INT_MODE_P (prev_mode))
10575 interm_optab = vec_pack_sbool_trunc_optab;
10576 else
10577 interm_optab
10578 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10579 optab_default);
10580 if (!interm_optab
10581 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10582 || insn_data[icode1].operand[0].mode != intermediate_mode
10583 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10584 == CODE_FOR_nothing))
10585 break;
10586
10587 interm_types->quick_push (intermediate_type);
10588 (*multi_step_cvt)++;
10589
10590 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10591 {
10592 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
10593 return true;
10594 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10595 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
10596 return true;
10597 }
10598
10599 prev_mode = intermediate_mode;
10600 prev_type = intermediate_type;
10601 optab1 = interm_optab;
10602 }
10603
10604 interm_types->release ();
10605 return false;
10606 }
10607
10608 /* Generate and return a statement that sets vector mask MASK such that
10609 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10610
10611 gcall *
10612 vect_gen_while (tree mask, tree start_index, tree end_index)
10613 {
10614 tree cmp_type = TREE_TYPE (start_index);
10615 tree mask_type = TREE_TYPE (mask);
10616 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10617 cmp_type, mask_type,
10618 OPTIMIZE_FOR_SPEED));
10619 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10620 start_index, end_index,
10621 build_zero_cst (mask_type));
10622 gimple_call_set_lhs (call, mask);
10623 return call;
10624 }
10625
10626 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10627 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10628
10629 tree
10630 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10631 tree end_index)
10632 {
10633 tree tmp = make_ssa_name (mask_type);
10634 gcall *call = vect_gen_while (tmp, start_index, end_index);
10635 gimple_seq_add_stmt (seq, call);
10636 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10637 }
10638
10639 /* Try to compute the vector types required to vectorize STMT_INFO,
10640 returning true on success and false if vectorization isn't possible.
10641
10642 On success:
10643
10644 - Set *STMT_VECTYPE_OUT to:
10645 - NULL_TREE if the statement doesn't need to be vectorized;
10646 - boolean_type_node if the statement is a boolean operation whose
10647 vector type can only be determined once all the other vector types
10648 are known; and
10649 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10650
10651 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10652 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10653 statement does not help to determine the overall number of units. */
10654
10655 opt_result
10656 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10657 tree *stmt_vectype_out,
10658 tree *nunits_vectype_out)
10659 {
10660 gimple *stmt = stmt_info->stmt;
10661
10662 *stmt_vectype_out = NULL_TREE;
10663 *nunits_vectype_out = NULL_TREE;
10664
10665 if (gimple_get_lhs (stmt) == NULL_TREE
10666 /* MASK_STORE has no lhs, but is ok. */
10667 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10668 {
10669 if (is_a <gcall *> (stmt))
10670 {
10671 /* Ignore calls with no lhs. These must be calls to
10672 #pragma omp simd functions, and what vectorization factor
10673 it really needs can't be determined until
10674 vectorizable_simd_clone_call. */
10675 if (dump_enabled_p ())
10676 dump_printf_loc (MSG_NOTE, vect_location,
10677 "defer to SIMD clone analysis.\n");
10678 return opt_result::success ();
10679 }
10680
10681 return opt_result::failure_at (stmt,
10682 "not vectorized: irregular stmt.%G", stmt);
10683 }
10684
10685 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10686 return opt_result::failure_at (stmt,
10687 "not vectorized: vector stmt in loop:%G",
10688 stmt);
10689
10690 tree vectype;
10691 tree scalar_type = NULL_TREE;
10692 if (STMT_VINFO_VECTYPE (stmt_info))
10693 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10694 else
10695 {
10696 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10697 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10698 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10699 else
10700 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10701
10702 /* Pure bool ops don't participate in number-of-units computation.
10703 For comparisons use the types being compared. */
10704 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10705 && is_gimple_assign (stmt)
10706 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10707 {
10708 *stmt_vectype_out = boolean_type_node;
10709
10710 tree rhs1 = gimple_assign_rhs1 (stmt);
10711 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10712 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10713 scalar_type = TREE_TYPE (rhs1);
10714 else
10715 {
10716 if (dump_enabled_p ())
10717 dump_printf_loc (MSG_NOTE, vect_location,
10718 "pure bool operation.\n");
10719 return opt_result::success ();
10720 }
10721 }
10722
10723 if (dump_enabled_p ())
10724 dump_printf_loc (MSG_NOTE, vect_location,
10725 "get vectype for scalar type: %T\n", scalar_type);
10726 vectype = get_vectype_for_scalar_type (scalar_type);
10727 if (!vectype)
10728 return opt_result::failure_at (stmt,
10729 "not vectorized:"
10730 " unsupported data-type %T\n",
10731 scalar_type);
10732
10733 if (!*stmt_vectype_out)
10734 *stmt_vectype_out = vectype;
10735
10736 if (dump_enabled_p ())
10737 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
10738 }
10739
10740 /* Don't try to compute scalar types if the stmt produces a boolean
10741 vector; use the existing vector type instead. */
10742 tree nunits_vectype;
10743 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10744 nunits_vectype = vectype;
10745 else
10746 {
10747 /* The number of units is set according to the smallest scalar
10748 type (or the largest vector size, but we only support one
10749 vector size per vectorization). */
10750 if (*stmt_vectype_out != boolean_type_node)
10751 {
10752 HOST_WIDE_INT dummy;
10753 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10754 &dummy, &dummy);
10755 }
10756 if (dump_enabled_p ())
10757 dump_printf_loc (MSG_NOTE, vect_location,
10758 "get vectype for scalar type: %T\n", scalar_type);
10759 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10760 }
10761 if (!nunits_vectype)
10762 return opt_result::failure_at (stmt,
10763 "not vectorized: unsupported data-type %T\n",
10764 scalar_type);
10765
10766 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10767 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10768 return opt_result::failure_at (stmt,
10769 "not vectorized: different sized vector "
10770 "types in statement, %T and %T\n",
10771 vectype, nunits_vectype);
10772
10773 if (dump_enabled_p ())
10774 {
10775 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
10776 nunits_vectype);
10777
10778 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10779 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10780 dump_printf (MSG_NOTE, "\n");
10781 }
10782
10783 *nunits_vectype_out = nunits_vectype;
10784 return opt_result::success ();
10785 }
10786
10787 /* Try to determine the correct vector type for STMT_INFO, which is a
10788 statement that produces a scalar boolean result. Return the vector
10789 type on success, otherwise return NULL_TREE. */
10790
10791 opt_tree
10792 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10793 {
10794 gimple *stmt = stmt_info->stmt;
10795 tree mask_type = NULL;
10796 tree vectype, scalar_type;
10797
10798 if (is_gimple_assign (stmt)
10799 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10800 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10801 {
10802 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10803 mask_type = get_mask_type_for_scalar_type (scalar_type);
10804
10805 if (!mask_type)
10806 return opt_tree::failure_at (stmt,
10807 "not vectorized: unsupported mask\n");
10808 }
10809 else
10810 {
10811 tree rhs;
10812 ssa_op_iter iter;
10813 enum vect_def_type dt;
10814
10815 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10816 {
10817 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10818 return opt_tree::failure_at (stmt,
10819 "not vectorized:can't compute mask"
10820 " type for statement, %G", stmt);
10821
10822 /* No vectype probably means external definition.
10823 Allow it in case there is another operand which
10824 allows to determine mask type. */
10825 if (!vectype)
10826 continue;
10827
10828 if (!mask_type)
10829 mask_type = vectype;
10830 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10831 TYPE_VECTOR_SUBPARTS (vectype)))
10832 return opt_tree::failure_at (stmt,
10833 "not vectorized: different sized mask"
10834 " types in statement, %T and %T\n",
10835 mask_type, vectype);
10836 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10837 != VECTOR_BOOLEAN_TYPE_P (vectype))
10838 return opt_tree::failure_at (stmt,
10839 "not vectorized: mixed mask and "
10840 "nonmask vector types in statement, "
10841 "%T and %T\n",
10842 mask_type, vectype);
10843 }
10844
10845 /* We may compare boolean value loaded as vector of integers.
10846 Fix mask_type in such case. */
10847 if (mask_type
10848 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10849 && gimple_code (stmt) == GIMPLE_ASSIGN
10850 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10851 mask_type = build_same_sized_truth_vector_type (mask_type);
10852 }
10853
10854 /* No mask_type should mean loop invariant predicate.
10855 This is probably a subject for optimization in if-conversion. */
10856 if (!mask_type)
10857 return opt_tree::failure_at (stmt,
10858 "not vectorized: can't compute mask type "
10859 "for statement: %G", stmt);
10860
10861 return opt_tree::success (mask_type);
10862 }